Skip to content

Commit

Permalink
Start making pull_US_data() to transition to hake-assessment
Browse files Browse the repository at this point in the history
  • Loading branch information
kellijohnson-NOAA committed Nov 12, 2024
1 parent e9b9a9b commit 5c913b6
Show file tree
Hide file tree
Showing 32 changed files with 547 additions and 280 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export(process_catch_norpac)
export(process_database)
export(process_weight_at_age)
export(process_weight_at_age_survey)
export(pull_US_data)
export(pull_database)
export(queryDB)
export(update_ss3_catch)
Expand Down
48 changes: 19 additions & 29 deletions R/hakedata-.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ hakedata_wd <- function() {
)
}
stopifnot(fs::dir_exists(wd))
stopifnot(basename(wd) == "data-tables")
return(wd)
}

Expand Down Expand Up @@ -87,43 +88,32 @@ hakedata_year <- function() {
#' and stored or entered passwords for accessing databases that store
#' confidential information about landings of Pacific Hake.
#'
#' @param database A vector of character values specifying which databases you
#' want login information for.
#' @param file A file path specifying where to find the passwords.
#' The path can be full or relative to your current working directory.
#' If a path is provided, the file that it leads to
#' must be for a text file with one password per line for each database
#' in the `database` argument and in that order.
#' The default for `database` means that the file would have two lines,
#' where the first line is the NORPAC password and
#' the second line is the PacFIN password.
#' These passwords should not be surrounded with quotes.
#' If a file name is not provided, which is the default behaviour, then
#' the user will be prompted for their passwords. This also happens if
#' the file cannot be found given the path provided.
#' @inheritParams pull_US_data
#'
#' @return A list with two entries, `usernames` and `passwords`.
#' Each element will have the same number of entries as the
#' input argument `database` and be named using the elements of `database`.
#' The list is invisibly returned to ensure that the passwords are not printed
#' to the screen. Thus, the function call should be assigned to an object.
#' @return
#' A list with two entries, `usernames` and `passwords`. Each entry contain a
#' named vector with one element for each element in the input argument
#' `database`. The list is invisibly returned to ensure that the passwords are
#' not printed to the screen. Thus, the function call should be assigned to an
#' object.
#' @export
#' @author Kelli F. Johnson
#' @examples
#' \dontrun{
#' # Prompted for passwords for each database
#' test <- hakedata_sql_password()
#' # Prompted for passwords for each database because file is not found
#' test <- hakedata_sql_password(file = "doesnotwork.txt")
#' # Prompted for passwords for each database because password_file is not found
#' test <- hakedata_sql_password(password_file = "doesnotwork.txt")
#' # On Kelli Johnson's machine, the following will work
#' test <- hakedata_sql_password(file = "password.txt")
#' test <- hakedata_sql_password(password_file = "password.txt")
#' # Doesn't work because entry for database is not in the list
#' # of allowed databases, i.e., the default for `database`.
#' test <- hakedata_sql_password(database = "onedatabase")
#' # Only look for one password
#' test <- hakedata_sql_password(database = "NORPAC")
#' }
hakedata_sql_password <- function(database = c("NORPAC", "PacFIN"), file) {
hakedata_sql_password <- function(password_file,
database = c("NORPAC", "PacFIN")) {
user <- Sys.info()["user"]
database <- match.arg(database, several.ok = TRUE)
name <- switch(user,
Expand All @@ -141,14 +131,13 @@ hakedata_sql_password <- function(database = c("NORPAC", "PacFIN"), file) {
}
)
stopifnot(!is.null(name))
stopifnot(all(names(name) %in% database))

if (missing(file)) {
file <- NULL
} else {
if (!file.exists(file)) file <- NULL
if (missing(password_file) || !file.exists(password_file)) {
password_file <- NULL
}

if (is.null(file)) {
if (is.null(password_file)) {
passwords <- rep(NA, length(database))
for (ii in seq_along(database)) {
passwords[ii] <- readline(
Expand All @@ -160,7 +149,8 @@ hakedata_sql_password <- function(database = c("NORPAC", "PacFIN"), file) {
)
}
} else {
passwords <- readLines(file, warn = FALSE)
passwords <- readLines(password_file, warn = FALSE)
stopifnot(length(database) == length(passwords))
}

names(passwords) <- database
Expand Down
2 changes: 1 addition & 1 deletion R/process_age-.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' @author Kelli F. Johnson
#'
process_age_sea <- function(atsea.ages = get_local(file = "atsea.ages.Rdat"),
ncatch = get_local(file = "norpac_catch.Rdat"),
ncatch = get_local(file = "ncatch.Rdat"),
years = 2008:hakedata_year(),
ages = 1:15,
files = fs::path(
Expand Down
6 changes: 3 additions & 3 deletions R/process_catch-.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
#' * us-cp-catch-rate-by-month.csv
#' * us-ms-catch-rate-by-month.csv
#'
process_catch_norpac <- function(ncatch = get_local(file = "norpac_catch.Rdat"),
process_catch_norpac <- function(ncatch = get_local(file = "ncatch.Rdat"),
nyears = 5,
savedir = hakedata_wd()) {
# Setup the environment
Expand Down Expand Up @@ -247,14 +247,14 @@ process_catch_norpac <- function(ncatch = get_local(file = "norpac_catch.Rdat"),
#'
#' @template pcatch
#' @template nyears
#' @template savedir
#' @inheritParams process_weight_at_age_survey
#'
#' @return The following files are saved to the disk:
#' * us-shore-catch-by-month.csv
#' * us-research-catch-by-month.csv
#' * us-ti-catch-by-month.csv
#'
process_catch_pacfin <- function(pcatch = get_local(file = "pacfin_catch.Rdat"),
process_catch_pacfin <- function(pcatch = get_local(file = "pcatch.Rdat"),
nyears = 5,
savedir = hakedata_wd()) {
# FLEET XXX is in the hake assessment as shore-based catches,
Expand Down
4 changes: 4 additions & 0 deletions R/process_database.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,9 @@ process_database <- function() {
# Weight at age
process_weight_at_age_survey()
process_weight_at_age_us()
old <- process_weight_at_age(
max_year = hakedata_year() - 1,
output_wtatage_file_name = "wtatage_fix.ss"
)
withforecast <- process_weight_at_age()
}
54 changes: 30 additions & 24 deletions R/process_weight_at_age-.R
Original file line number Diff line number Diff line change
Expand Up @@ -128,16 +128,14 @@ process_weight_at_age_us <- function(savedir = hakedata_wd()) {
#' This will correspond to the maximum age group in the data, not in the
#' model because SS can model many ages when there is only information in
#' the data for a few ages.
#' @param yrs A vector of years to search for recent data. Typically,
#' the vector starts with 2008 and ends with the most recent year
#' of data. This will allow files created from `process_weight_at_age_US()` to
#' be included in the analysis, i.e., recent US data. Typically, you
#' should not have to change this value from the default entry.
#' @param navgyears The number of early and late years to average since
#' 1975 and \code{max(yrs)} for the early and late analysis asked for
#' by the Scientific Review Group in 2017. The argument can be a single
#' value or a vector of two values, where in the latter case the second
#' value will be used for the most recent time period.
#' @param max_year A four-digit integer specifying the maximum year of data
#' that you want to include in the weight-at-age data. The default is the
#' last year of data found using [hakedata_year()].
#' @param navgyears The number of early and late years to average since 1975 and
#' `max_year` for the early and late analysis asked for by the Scientific
#' Review Group in 2017. The argument can be a single value or a vector of two
#' values, where in the latter case the second value will be used for the most
#' recent time period.
#' @param nforecast The number of years to forecast into the future.
#' Typically, this is three for the hake assessment and will lead to
#' this many rows of mean weight-at-age data being copied to the data frame
Expand All @@ -153,10 +151,11 @@ process_weight_at_age_us <- function(savedir = hakedata_wd()) {
#'
process_weight_at_age <- function(dir = hakedata_wd(),
maxage = 15,
yrs = 2008:hakedata_year(),
max_year = hakedata_year(),
navgyears = 5,
nforecast = 4,
maturity = maturity_at_age) {
maturity = maturity_at_age,
output_wtatage_file_name = "wtatage.ss") {
fs::dir_create(path = file.path(dir, "plots"))

# length-weight-age_data.rds provided by CG on 2021-01-09 in google drive #703
Expand All @@ -171,13 +170,21 @@ process_weight_at_age <- function(dir = hakedata_wd(),
dat <- purrr::map_dfr(
files_weights,
.f = weight_at_age_read
) %>%
weight_at_age_outlier(filter = FALSE, drop = FALSE)

late <- (max(yrs) - navgyears + 1):(max(yrs))
) |>
# Fix the four--five weight units from PacFIN that are wrong
# TODO: remove this mutate when the data is fixed.
dplyr::mutate(
Weight_kg = ifelse(
(Source == "US_shore" & Weight_kg < 0.09 & Age_yrs > 4),
Weight_kg * 10,
Weight_kg
)
) |>
weight_at_age_outlier(filter = FALSE, drop = FALSE) |>
dplyr::filter(!outlier, Year <= max_year)

gg <- plot_weight_at_age(
data = dplyr::filter(dat, Age_yrs <= 10, outlier == FALSE),
data = dplyr::filter(dat, Age_yrs <= 10),
maxage = maxage
)
ggplot2::ggsave(
Expand All @@ -186,7 +193,7 @@ process_weight_at_age <- function(dir = hakedata_wd(),
filename = file.path(dir, "plots", "meanweightatage_source.png")
)
gg <- plot_weight_at_age(
data = dplyr::filter(dat, Age_yrs <= maxage, outlier == FALSE),
data = dplyr::filter(dat, Age_yrs <= maxage),
maxage = maxage
) +
ggplot2::facet_grid(cat ~ .)
Expand All @@ -197,7 +204,6 @@ process_weight_at_age <- function(dir = hakedata_wd(),

#### making input files for SS with the holes still present
# NULL months keeps the Poland data
dat <- dplyr::filter(dat, !outlier)
wtage_All <- weight_at_age_wide(dat)
wtage_All_wMean <- dplyr::bind_rows(
weight_at_age_wide(dat %>% dplyr::mutate(Year = -1940)),
Expand Down Expand Up @@ -243,7 +249,7 @@ process_weight_at_age <- function(dir = hakedata_wd(),
wtageInterp2_All <- fill_wtage_matrix(wtageInterp1_All)
wtageInterp2_All$Note <- fill_wtage_matrix(wtage_All)$Note

# write output combining all fleets closer to format used by SS
# write output combining all fleets closer to format used by SS3
wtage_All_wMean$Note <- c(paste("# Mean from ", min(dat$Year), "-", max(dat$Year), sep = ""), wtageInterp2_All$Note)
wtageInterp2_All <- rbind(wtage_All_wMean[1, ], wtageInterp2_All)

Expand All @@ -254,7 +260,7 @@ process_weight_at_age <- function(dir = hakedata_wd(),
counts = counts_All_wMean,
lengths = lenage_All_wMean,
dir = file.path(dir, "plots"),
year = max(yrs),
year = max_year,
maxage = maxage
)

Expand All @@ -273,7 +279,7 @@ process_weight_at_age <- function(dir = hakedata_wd(),
withforecast <- dplyr::bind_rows(
wtage_extended,
wtage_extended %>%
dplyr::filter(`#Yr` %in% late) %>%
dplyr::filter(`#Yr` %in% (max_year - navgyears + 1):(max_year)) %>%
dplyr::mutate(
dplyr::across(.cols = dplyr::starts_with("a"), mean),
`#Yr` = max(`#Yr`) + 1:NROW(.)
Expand All @@ -283,7 +289,7 @@ process_weight_at_age <- function(dir = hakedata_wd(),
)
)
write_wtatage_file(
file = fs::path(dirname(dir), "wtatage.ss"),
file = fs::path(dirname(dir), output_wtatage_file_name),
data = withforecast,
maturity = maturity
)
Expand All @@ -292,5 +298,5 @@ process_weight_at_age <- function(dir = hakedata_wd(),
file = fs::path(dir, "LWAdata.Rdata")
)

return(withforecast)
return(invisible(withforecast))
}
Loading

0 comments on commit 5c913b6

Please sign in to comment.