From fe6244eeb5a9077a778f41ee5ab61c7f364cfd63 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Fri, 23 Oct 2020 14:03:36 -0700 Subject: [PATCH 1/8] memoise collect(), #4 --- DESCRIPTION | 3 ++- NAMESPACE | 1 + R/utils-classes.R | 54 ++++++++++++++++++++++++----------------------- 3 files changed, 31 insertions(+), 27 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b86444c5..207f0d57 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -49,7 +49,8 @@ Imports: sf (>= 0.7), tidyselect (>= 0.2.5), utils, - xml2 + xml2, + memoise (>= 1.1.0) Suggests: covr, ggplot2, diff --git a/NAMESPACE b/NAMESPACE index 88d18d9f..2874c6c2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -81,6 +81,7 @@ importFrom(dplyr,show_query) importFrom(dplyr,sql_escape_ident) importFrom(dplyr,sql_escape_string) importFrom(dplyr,sql_translate_env) +importFrom(memoise,memoise) importFrom(methods,setOldClass) importFrom(readr,read_csv) importFrom(readr,read_tsv) diff --git a/R/utils-classes.R b/R/utils-classes.R index e20348d2..42631b20 100644 --- a/R/utils-classes.R +++ b/R/utils-classes.R @@ -325,32 +325,7 @@ mutate.bcdc_promise <- function(.data, ...){ mutate({dots}) "), call. = FALSE) } - -#' Force collection of Web Service request from B.C. Data Catalogue -#' -#' After tuning a query, `collect()` is used to actually bring the data into memory. -#' This will retrieve an sf object into R. The `as_tibble()` function can be used -#' interchangeably with `collect` which matches `dbplyr` behaviour. -#' -#' @param x object of class bcdc_promise -#' @inheritParams collect -#' @rdname collect-methods -#' @export -#' -#' @examples -#' \donttest{ -#' try( -#' bcdc_query_geodata("bc-airports") %>% -#' collect() -#' ) -#' -#' try( -#' bcdc_query_geodata("bc-airports") %>% -#' as_tibble() -#' ) -#' } -#' -collect.bcdc_promise <- function(x, ...){ +collect_bcdc_promise_ <- function(x, ...){ check_chunk_limit() x$query_list$CQL_FILTER <- finalize_cql(x$query_list$CQL_FILTER) @@ -411,6 +386,33 @@ collect.bcdc_promise <- function(x, ...){ } +#' Force collection of Web Service request from B.C. Data Catalogue +#' +#' After tuning a query, `collect()` is used to actually bring the data into memory. +#' This will retrieve an sf object into R. The `as_tibble()` function can be used +#' interchangeably with `collect` which matches `dbplyr` behaviour. +#' +#' @param x object of class bcdc_promise +#' @importFrom memoise memoise +#' @inheritParams collect +#' @rdname collect-methods +#' @export +#' +#' @examples +#' \donttest{ +#' try( +#' bcdc_query_geodata("bc-airports") %>% +#' collect() +#' ) +#' +#' try( +#' bcdc_query_geodata("bc-airports") %>% +#' as_tibble() +#' ) +#' } +#' +collect.bcdc_promise <- memoise(collect_bcdc_promise_) + #' @inheritParams collect.bcdc_promise #' @rdname collect-methods From d83cc3ad7dc1223ef433a5643b0b6795d4efb3db Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Fri, 23 Oct 2020 15:25:20 -0700 Subject: [PATCH 2/8] Implement caching on disk, with a timeout and a forget function to clear the cache. #4 --- DESCRIPTION | 3 ++- NAMESPACE | 3 ++- R/utils-classes.R | 42 +++++++++++++++++++++++++++++++++++------- man/bcdc_forget.Rd | 17 +++++++++++++++++ man/collect-methods.Rd | 21 ++++++++++++++++++--- 5 files changed, 74 insertions(+), 12 deletions(-) create mode 100644 man/bcdc_forget.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 207f0d57..fd419134 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -50,7 +50,8 @@ Imports: tidyselect (>= 0.2.5), utils, xml2, - memoise (>= 1.1.0) + memoise (>= 1.1.0), + rappdirs (>= 0.3.1) Suggests: covr, ggplot2, diff --git a/NAMESPACE b/NAMESPACE index 2874c6c2..e2134bee 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -46,6 +46,7 @@ export(WITHIN) export(as_tibble) export(bcdc_browse) export(bcdc_describe_feature) +export(bcdc_forget) export(bcdc_get_data) export(bcdc_get_record) export(bcdc_list) @@ -65,6 +66,7 @@ exportClasses(wfsConnection) exportMethods(dbQuoteIdentifier) exportMethods(dbQuoteString) import(DBI) +import(memoise) import(methods) importFrom(cli,cat_bullet) importFrom(cli,cat_line) @@ -81,7 +83,6 @@ importFrom(dplyr,show_query) importFrom(dplyr,sql_escape_ident) importFrom(dplyr,sql_escape_string) importFrom(dplyr,sql_translate_env) -importFrom(memoise,memoise) importFrom(methods,setOldClass) importFrom(readr,read_csv) importFrom(readr,read_tsv) diff --git a/R/utils-classes.R b/R/utils-classes.R index 42631b20..ebcac963 100644 --- a/R/utils-classes.R +++ b/R/utils-classes.R @@ -386,14 +386,29 @@ collect_bcdc_promise_ <- function(x, ...){ } -#' Force collection of Web Service request from B.C. Data Catalogue +#' Force collection of Web Service request from B.C. Data +#' Catalogue #' -#' After tuning a query, `collect()` is used to actually bring the data into memory. -#' This will retrieve an sf object into R. The `as_tibble()` function can be used -#' interchangeably with `collect` which matches `dbplyr` behaviour. +#' After tuning a query, `collect()` is used to actually +#' bring the data into memory. This will retrieve an sf +#' object into R. The `as_tibble()` function can be used +#' interchangeably with `collect` which matches `dbplyr` +#' behaviour. +#' +#' The result of `collect()`-ing a query will be cached to +#' avoid repeatedly requesting the same data from the +#' server. The duration of the caching can be customized +#' by setting the option `bcdc_cache_timeout` to a +#' different value (in seconds). The default is one hour +#' (3600 seconds). +#' +#' The cache can be cleared by running [bcdc_forget()]. +#' Note this will clear the cache for all `collect()` +#' calls in the previous time frame specified in the +#' `bcdc_cache_timeout` option. #' #' @param x object of class bcdc_promise -#' @importFrom memoise memoise +#' @import memoise #' @inheritParams collect #' @rdname collect-methods #' @export @@ -411,8 +426,21 @@ collect_bcdc_promise_ <- function(x, ...){ #' ) #' } #' -collect.bcdc_promise <- memoise(collect_bcdc_promise_) - +collect.bcdc_promise <- memoise( + collect_bcdc_promise_, + ~ timeout(getOption("bcdc_cache_timeout", 3600)), # 1 hour + cache = cache_filesystem(rappdirs::user_data_dir("bcdata")) +) + +#' Forget (clear) the cache of objects returned by +#' [collect()] +#' +#' @return `TRUE` if the cache existed previously and was +#' successfully cleared, otherwise `FALSE`. +#' @export +bcdc_forget <- function() { + memoise::forget(collect.bcdc_promise) +} #' @inheritParams collect.bcdc_promise #' @rdname collect-methods diff --git a/man/bcdc_forget.Rd b/man/bcdc_forget.Rd new file mode 100644 index 00000000..6277a04a --- /dev/null +++ b/man/bcdc_forget.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-classes.R +\name{bcdc_forget} +\alias{bcdc_forget} +\title{Forget (clear) the cache of objects returned by +\code{\link[=collect]{collect()}}} +\usage{ +bcdc_forget() +} +\value{ +\code{TRUE} if the cache existed previously and was +successfully cleared, otherwise \code{FALSE}. +} +\description{ +Forget (clear) the cache of objects returned by +\code{\link[=collect]{collect()}} +} diff --git a/man/collect-methods.Rd b/man/collect-methods.Rd index 0bc8fbb0..c6884ac1 100644 --- a/man/collect-methods.Rd +++ b/man/collect-methods.Rd @@ -18,12 +18,27 @@ \description{ See \code{tibble::\link[tibble]{as_tibble}} for details. -After tuning a query, \code{collect()} is used to actually bring the data into memory. -This will retrieve an sf object into R. The \code{as_tibble()} function can be used -interchangeably with \code{collect} which matches \code{dbplyr} behaviour. +After tuning a query, \code{collect()} is used to actually +bring the data into memory. This will retrieve an sf +object into R. The \code{as_tibble()} function can be used +interchangeably with \code{collect} which matches \code{dbplyr} +behaviour. See \code{dplyr::\link[dplyr:compute]{collect}} for details. } +\details{ +The result of \code{collect()}-ing a query will be cached to +avoid repeatedly requesting the same data from the +server. The duration of the caching can be customized +by setting the option \code{bcdc_cache_timeout} to a +different value (in seconds). The default is one hour +(3600 seconds). + +The cache can be cleared by running \code{\link[=bcdc_forget]{bcdc_forget()}}. +Note this will clear the cache for all \code{collect()} +calls in the previous time frame specified in the +\code{bcdc_cache_timeout} option. +} \examples{ \donttest{ try( From a333db8aaabcdbb1018a06f776664ed6287d9e49 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Fri, 23 Oct 2020 15:36:14 -0700 Subject: [PATCH 3/8] use user_cache_dir over user_data_dir --- R/utils-classes.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils-classes.R b/R/utils-classes.R index ebcac963..423e4b5b 100644 --- a/R/utils-classes.R +++ b/R/utils-classes.R @@ -429,7 +429,7 @@ collect_bcdc_promise_ <- function(x, ...){ collect.bcdc_promise <- memoise( collect_bcdc_promise_, ~ timeout(getOption("bcdc_cache_timeout", 3600)), # 1 hour - cache = cache_filesystem(rappdirs::user_data_dir("bcdata")) + cache = cache_filesystem(rappdirs::user_cache_dir("bcdata")) ) #' Forget (clear) the cache of objects returned by From 2db02de9d8fb6655c1637b5c2352f3b025f57fec Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Fri, 23 Oct 2020 16:06:18 -0700 Subject: [PATCH 4/8] Add caching options, #4 --- NAMESPACE | 2 ++ R/bcdc_options.R | 12 ++++++++++-- R/utils-classes.R | 25 +++++++++++++++++++++++-- man/bcdc_cache_path.Rd | 13 +++++++++++++ man/bcdc_cache_timeout.Rd | 12 ++++++++++++ man/bcdc_options.Rd | 6 ++++++ 6 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 man/bcdc_cache_path.Rd create mode 100644 man/bcdc_cache_timeout.Rd diff --git a/NAMESPACE b/NAMESPACE index e2134bee..433a5c66 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,6 +45,8 @@ export(TOUCHES) export(WITHIN) export(as_tibble) export(bcdc_browse) +export(bcdc_cache_path) +export(bcdc_cache_timeout) export(bcdc_describe_feature) export(bcdc_forget) export(bcdc_get_data) diff --git a/R/bcdc_options.R b/R/bcdc_options.R index 8132efc0..0946c942 100644 --- a/R/bcdc_options.R +++ b/R/bcdc_options.R @@ -29,6 +29,12 @@ #' requested. On faster internet connections, a bigger chunk limit could be useful while on slower connections, #' it is advisable to lower the chunk limit. Chunks must be less than 10000. #' +#' `bcdata.cache_path` is the location on your computer where results from web requests are cached. The default +#' is set by [rappdirs::user_cache_dir()] via [bcdc_cache_path()]. +#' +#' `bcdata.cache_timeout` is the time, in seconds, that the cache is maintained. Default is +#' 3600 seconds (one hour). +#' #' @examples #' \donttest{ #' ## Save initial conditions @@ -64,8 +70,10 @@ bcdc_options <- function() { dplyr::tribble( ~ option, ~ value, ~default, - "bcdata.max_geom_pred_size", null_to_na(getOption("bcdata.max_geom_pred_size")), 5E5, - "bcdata.chunk_limit",null_to_na(getOption("bcdata.chunk_limit")), 1000 + "bcdata.max_geom_pred_size", null_to_na(getOption("bcdata.max_geom_pred_size")), as.character(5E5), + "bcdata.chunk_limit",null_to_na(getOption("bcdata.chunk_limit")), as.character(1000), + "bcdata.cache_path",null_to_na(getOption("bcdata.cache_path")), rappdirs::user_cache_dir("bcdata"), + "bcdata.cache_timeout",null_to_na(getOption("bcdata.cache_timeout")), as.character(3600) ) } diff --git a/R/utils-classes.R b/R/utils-classes.R index 423e4b5b..b27b309f 100644 --- a/R/utils-classes.R +++ b/R/utils-classes.R @@ -386,6 +386,27 @@ collect_bcdc_promise_ <- function(x, ...){ } +#' Retrieve Default Cache timeout +#' +#' Retrieves the length of time that a cache of [collect()]ed +#' web resources is kept. Default is 1 hour (3600 secons). +#' +#' @export +bcdc_cache_timeout <- function() { + getOption("bcdata.cache_timeout", 3600) +} + +#' Retrieve Default Cache Path +#' +#' Retrieves the default path used to cache the result of web requests. Makes +#' use of the \code{rappdirs} package to use cache folders +#' defined by each operating system +#' +#' @export +bcdc_cache_path <- function() { + getOption("bcdata.cache_path", rappdirs::user_cache_dir("bcdata")) +} + #' Force collection of Web Service request from B.C. Data #' Catalogue #' @@ -428,8 +449,8 @@ collect_bcdc_promise_ <- function(x, ...){ #' collect.bcdc_promise <- memoise( collect_bcdc_promise_, - ~ timeout(getOption("bcdc_cache_timeout", 3600)), # 1 hour - cache = cache_filesystem(rappdirs::user_cache_dir("bcdata")) + ~ timeout(bcdc_cache_timeout()), # 1 hour + cache = cache_filesystem(bcdc_cache_path()) ) #' Forget (clear) the cache of objects returned by diff --git a/man/bcdc_cache_path.Rd b/man/bcdc_cache_path.Rd new file mode 100644 index 00000000..70a22b96 --- /dev/null +++ b/man/bcdc_cache_path.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-classes.R +\name{bcdc_cache_path} +\alias{bcdc_cache_path} +\title{Retrieve Default Cache Path} +\usage{ +bcdc_cache_path() +} +\description{ +Retrieves the default path used to cache the result of web requests. Makes +use of the \code{rappdirs} package to use cache folders +defined by each operating system +} diff --git a/man/bcdc_cache_timeout.Rd b/man/bcdc_cache_timeout.Rd new file mode 100644 index 00000000..cdad94b7 --- /dev/null +++ b/man/bcdc_cache_timeout.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-classes.R +\name{bcdc_cache_timeout} +\alias{bcdc_cache_timeout} +\title{Retrieve Default Cache timeout} +\usage{ +bcdc_cache_timeout() +} +\description{ +Retrieves the length of time that a cache of \code{\link[=collect]{collect()}}ed +web resources is kept. Default is 1 hour (3600 secons). +} diff --git a/man/bcdc_options.Rd b/man/bcdc_options.Rd index 6e9b2ba1..a250373a 100644 --- a/man/bcdc_options.Rd +++ b/man/bcdc_options.Rd @@ -24,6 +24,12 @@ from the catalogue, the request is broken up into smaller chunks which are then been downloaded. bcdata does this all for you but using this option you can set the size of the chunk requested. On faster internet connections, a bigger chunk limit could be useful while on slower connections, it is advisable to lower the chunk limit. Chunks must be less than 10000. + +\code{bcdata.cache_path} is the location on your computer where results from web requests are cached. The default +is set by \code{\link[rappdirs:user_cache_dir]{rappdirs::user_cache_dir()}} via \code{\link[=bcdc_cache_path]{bcdc_cache_path()}}. + +\code{bcdata.cache_timeout} is the time, in seconds, that the cache is maintained. Default is +3600 seconds (one hour). } \examples{ \donttest{ From 112f315c2e78cbc5944388e2c3a3b8697a0d500b Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Mon, 26 Oct 2020 14:19:16 -0700 Subject: [PATCH 5/8] Try to set cache path in actions cmd-check.yaml --- .github/workflows/cmd-check.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cmd-check.yaml b/.github/workflows/cmd-check.yaml index f553284c..429ce099 100644 --- a/.github/workflows/cmd-check.yaml +++ b/.github/workflows/cmd-check.yaml @@ -93,7 +93,9 @@ jobs: shell: Rscript {0} - name: Check - run: rcmdcheck::rcmdcheck(args = ${{ matrix.config.args }}, error_on = 'warning', check_dir = 'check') + run: | + options(bcdata.cache_path = file.path(Sys.getenv("GITHUB_WORKSPACE"), "bcdata_cache")) + rcmdcheck::rcmdcheck(args = ${{ matrix.config.args }}, error_on = 'warning', check_dir = 'check') shell: Rscript {0} - name: Upload check results From ec14a309a398856cd95d284a2bfffd9f1f70df88 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Mon, 26 Oct 2020 16:20:24 -0700 Subject: [PATCH 6/8] Try withr::with_options to change cache path for R CMD check on GH Actions --- .github/workflows/cmd-check.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cmd-check.yaml b/.github/workflows/cmd-check.yaml index 429ce099..d3a9d1b9 100644 --- a/.github/workflows/cmd-check.yaml +++ b/.github/workflows/cmd-check.yaml @@ -82,7 +82,7 @@ jobs: - name: Install dependencies run: | remotes::install_deps(dependencies = TRUE, configure.args = c('sf' = '${{ matrix.config.sf_args }}')) - install.packages("rcmdcheck") + install.packages(c("rcmdcheck, "withr")) shell: Rscript {0} - name: install dev version of dbplyr to test for upcoming 2.0 release @@ -94,8 +94,11 @@ jobs: - name: Check run: | - options(bcdata.cache_path = file.path(Sys.getenv("GITHUB_WORKSPACE"), "bcdata_cache")) - rcmdcheck::rcmdcheck(args = ${{ matrix.config.args }}, error_on = 'warning', check_dir = 'check') + withr::with_options( + list(bcdata.cache_path = file.path(Sys.getenv("GITHUB_WORKSPACE"), "bcdata_cache")), + rcmdcheck::rcmdcheck(args = ${{ matrix.config.args }}, + error_on = 'warning', check_dir = 'check') + ) shell: Rscript {0} - name: Upload check results From ba121702cf18eda1a74f35c7b237b6716d5357b8 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Mon, 26 Oct 2020 16:28:30 -0700 Subject: [PATCH 7/8] typo, missing quote --- .github/workflows/cmd-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmd-check.yaml b/.github/workflows/cmd-check.yaml index d3a9d1b9..f74ec7fb 100644 --- a/.github/workflows/cmd-check.yaml +++ b/.github/workflows/cmd-check.yaml @@ -82,7 +82,7 @@ jobs: - name: Install dependencies run: | remotes::install_deps(dependencies = TRUE, configure.args = c('sf' = '${{ matrix.config.sf_args }}')) - install.packages(c("rcmdcheck, "withr")) + install.packages(c("rcmdcheck", "withr")) shell: Rscript {0} - name: install dev version of dbplyr to test for upcoming 2.0 release From 88b9527e8f6b37d443ca53db3a57f358a6917479 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Mon, 26 Oct 2020 17:49:01 -0700 Subject: [PATCH 8/8] set options before pkg load/check; add hidden option for cache verbosity --- .github/workflows/cmd-check.yaml | 9 ++--- R/bcdc_options.R | 57 +++++++++++++++++++++----------- R/utils-classes.R | 9 ++++- man/bcdc_options.Rd | 57 +++++++++++++++++++++----------- 4 files changed, 87 insertions(+), 45 deletions(-) diff --git a/.github/workflows/cmd-check.yaml b/.github/workflows/cmd-check.yaml index f74ec7fb..428913be 100644 --- a/.github/workflows/cmd-check.yaml +++ b/.github/workflows/cmd-check.yaml @@ -82,7 +82,7 @@ jobs: - name: Install dependencies run: | remotes::install_deps(dependencies = TRUE, configure.args = c('sf' = '${{ matrix.config.sf_args }}')) - install.packages(c("rcmdcheck", "withr")) + install.packages("rcmdcheck") shell: Rscript {0} - name: install dev version of dbplyr to test for upcoming 2.0 release @@ -94,11 +94,8 @@ jobs: - name: Check run: | - withr::with_options( - list(bcdata.cache_path = file.path(Sys.getenv("GITHUB_WORKSPACE"), "bcdata_cache")), - rcmdcheck::rcmdcheck(args = ${{ matrix.config.args }}, - error_on = 'warning', check_dir = 'check') - ) + cat(paste0('options(bcdata.cache_path = "', file.path(Sys.getenv("GITHUB_WORKSPACE"), "bcdata_cache"), '")\n'), file = "~/.Rprofile", append = TRUE) + rcmdcheck::rcmdcheck(args = ${{ matrix.config.args }}, error_on = 'warning', check_dir = 'check') shell: Rscript {0} - name: Upload check results diff --git a/R/bcdc_options.R b/R/bcdc_options.R index 0946c942..896cb0d2 100644 --- a/R/bcdc_options.R +++ b/R/bcdc_options.R @@ -10,30 +10,49 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -#' Retrieve options used in bcdata, their value if set and the default value. +#' Retrieve options used in bcdata, their value if set and +#' the default value. #' -#' This function retrieves bcdata specific options that can be set. These options can be set -#' using `option({name of the option} = {value of the option})`. The default options are purposefully -#' set conservatively to hopefully ensure successful requests. Resetting these options may result in -#' failed calls to the data catalogue. Options in R are reset every time R is re-started. See examples for -#' addition ways to restore your initial state. +#' This function retrieves bcdata specific options that +#' can be set. These options can be set using +#' `option({name of the option} = {value of the option})`. +#' The default options are purposefully set conservatively +#' to hopefully ensure successful requests. Resetting +#' these options may result in failed calls to the data +#' catalogue. Options in R are reset every time R is +#' re-started. See examples for addition ways to restore +#' your initial state. #' -#' `bcdata.max_geom_pred_size` is the maximum size of an object used for a geometric operation. Objects -#' that are bigger than this value will have a bounding box drawn and apply the geometric operation -#' on that simpler polygon. Users can try to increase the maximum geometric predicate size and see -#' if the bcdata catalogue accepts their request. +#' `bcdata.max_geom_pred_size` is the maximum size of an +#' object used for a geometric operation. Objects that are +#' bigger than this value will have a bounding box drawn +#' and apply the geometric operation on that simpler +#' polygon. Users can try to increase the maximum +#' geometric predicate size and see if the bcdata +#' catalogue accepts their request. #' -#' `bcdata.chunk_limit` is an option useful when dealing with very large data sets. When requesting large objects -#' from the catalogue, the request is broken up into smaller chunks which are then recombined after they've -#' been downloaded. bcdata does this all for you but using this option you can set the size of the chunk -#' requested. On faster internet connections, a bigger chunk limit could be useful while on slower connections, -#' it is advisable to lower the chunk limit. Chunks must be less than 10000. +#' `bcdata.chunk_limit` is an option useful when dealing +#' with very large data sets. When requesting large +#' objects from the catalogue, the request is broken up +#' into smaller chunks which are then recombined after +#' they've been downloaded. bcdata does this all for you +#' but using this option you can set the size of the chunk +#' requested. On faster internet connections, a bigger +#' chunk limit could be useful while on slower +#' connections, it is advisable to lower the chunk limit. +#' Chunks must be less than 10000. #' -#' `bcdata.cache_path` is the location on your computer where results from web requests are cached. The default -#' is set by [rappdirs::user_cache_dir()] via [bcdc_cache_path()]. +#' `bcdata.cache_path` is the location on your computer +#' where results from web requests are cached. The default +#' is set by [rappdirs::user_cache_dir()] via +#' [bcdc_cache_path()]. This option can only be set before +#' the package is loaded (e.g., by setting it in your +#' .Rprofile file). #' -#' `bcdata.cache_timeout` is the time, in seconds, that the cache is maintained. Default is -#' 3600 seconds (one hour). +#' `bcdata.cache_timeout` is the time, in seconds, that +#' the cache is maintained. Default is 3600 seconds (one +#' hour). This option can only be set before the package +#' is loaded (e.g., by setting it in your .Rprofile file). #' #' @examples #' \donttest{ diff --git a/R/utils-classes.R b/R/utils-classes.R index b27b309f..07fda156 100644 --- a/R/utils-classes.R +++ b/R/utils-classes.R @@ -381,9 +381,16 @@ collect_bcdc_promise_ <- function(x, ...){ txt <- cc$parse("UTF-8") - as.bcdc_sf(bcdc_read_sf(txt), query_list = query_list, url = url, + ret <- as.bcdc_sf(bcdc_read_sf(txt), query_list = query_list, url = url, full_url = full_url) + if (getOption("bcdata.cache_verbose", FALSE)) { + message("caching for ", bcdc_cache_timeout(), + " seconds at ", bcdc_cache_path()) + } + + ret + } #' Retrieve Default Cache timeout diff --git a/man/bcdc_options.Rd b/man/bcdc_options.Rd index a250373a..ef37850c 100644 --- a/man/bcdc_options.Rd +++ b/man/bcdc_options.Rd @@ -2,34 +2,53 @@ % Please edit documentation in R/bcdc_options.R \name{bcdc_options} \alias{bcdc_options} -\title{Retrieve options used in bcdata, their value if set and the default value.} +\title{Retrieve options used in bcdata, their value if set and +the default value.} \usage{ bcdc_options() } \description{ -This function retrieves bcdata specific options that can be set. These options can be set -using \verb{option(\{name of the option\} = \{value of the option\})}. The default options are purposefully -set conservatively to hopefully ensure successful requests. Resetting these options may result in -failed calls to the data catalogue. Options in R are reset every time R is re-started. See examples for -addition ways to restore your initial state. +This function retrieves bcdata specific options that +can be set. These options can be set using +\verb{option(\{name of the option\} = \{value of the option\})}. +The default options are purposefully set conservatively +to hopefully ensure successful requests. Resetting +these options may result in failed calls to the data +catalogue. Options in R are reset every time R is +re-started. See examples for addition ways to restore +your initial state. } \details{ -\code{bcdata.max_geom_pred_size} is the maximum size of an object used for a geometric operation. Objects -that are bigger than this value will have a bounding box drawn and apply the geometric operation -on that simpler polygon. Users can try to increase the maximum geometric predicate size and see -if the bcdata catalogue accepts their request. +\code{bcdata.max_geom_pred_size} is the maximum size of an +object used for a geometric operation. Objects that are +bigger than this value will have a bounding box drawn +and apply the geometric operation on that simpler +polygon. Users can try to increase the maximum +geometric predicate size and see if the bcdata +catalogue accepts their request. -\code{bcdata.chunk_limit} is an option useful when dealing with very large data sets. When requesting large objects -from the catalogue, the request is broken up into smaller chunks which are then recombined after they've -been downloaded. bcdata does this all for you but using this option you can set the size of the chunk -requested. On faster internet connections, a bigger chunk limit could be useful while on slower connections, -it is advisable to lower the chunk limit. Chunks must be less than 10000. +\code{bcdata.chunk_limit} is an option useful when dealing +with very large data sets. When requesting large +objects from the catalogue, the request is broken up +into smaller chunks which are then recombined after +they've been downloaded. bcdata does this all for you +but using this option you can set the size of the chunk +requested. On faster internet connections, a bigger +chunk limit could be useful while on slower +connections, it is advisable to lower the chunk limit. +Chunks must be less than 10000. -\code{bcdata.cache_path} is the location on your computer where results from web requests are cached. The default -is set by \code{\link[rappdirs:user_cache_dir]{rappdirs::user_cache_dir()}} via \code{\link[=bcdc_cache_path]{bcdc_cache_path()}}. +\code{bcdata.cache_path} is the location on your computer +where results from web requests are cached. The default +is set by \code{\link[rappdirs:user_cache_dir]{rappdirs::user_cache_dir()}} via +\code{\link[=bcdc_cache_path]{bcdc_cache_path()}}. This option can only be set before +the package is loaded (e.g., by setting it in your +.Rprofile file). -\code{bcdata.cache_timeout} is the time, in seconds, that the cache is maintained. Default is -3600 seconds (one hour). +\code{bcdata.cache_timeout} is the time, in seconds, that +the cache is maintained. Default is 3600 seconds (one +hour). This option can only be set before the package +is loaded (e.g., by setting it in your .Rprofile file). } \examples{ \donttest{