From 5c913b6319e2ddf6a48628e92b61331c04b4fae0 Mon Sep 17 00:00:00 2001
From: kellijohnson-NOAA <kelli.johnson@noaa.gov>
Date: Tue, 12 Nov 2024 14:35:03 -0800
Subject: [PATCH] Start making pull_US_data() to transition to hake-assessment

---
 NAMESPACE                                     |   1 +
 R/hakedata-.R                                 |  48 ++--
 R/process_age-.R                              |   2 +-
 R/process_catch-.R                            |   6 +-
 R/process_database.R                          |   4 +
 R/process_weight_at_age-.R                    |  54 ++--
 R/pull_US_data.R                              | 264 ++++++++++++++++++
 R/pull_database.R                             |   2 +-
 README.Rmd                                    |  68 -----
 README.md                                     |  47 ++--
 inst/extdata/sql/atSeaSquashTableAges.sql     |  24 --
 inst/extdata/sql/atsea_foreign_ages.sql       |  14 -
 inst/extdata/sql/nages_domestic.sql           |  20 ++
 inst/extdata/sql/nages_foreign.sql            |  18 ++
 .../sql/{norpac_catch.sql => ncatch.sql}      |   0
 .../sql/{atseaAgeWeight.sql => nweight.sql}   |   0
 inst/extdata/sql/pacfin.atseabysector.sql     |  13 -
 inst/extdata/sql/pacfin_comprehensive_bds.sql |   3 -
 .../sql/{pacfin_catch.sql => pcatch.sql}      |   5 +-
 .../sql/{pacfin_spec.sql => pspec.sql}        |   0
 man/hakedata_sql_password.Rd                  |  46 +--
 man/process_age_sea.Rd                        |   2 +-
 man/process_catch_norpac.Rd                   |   2 +-
 man/process_catch_pacfin.Rd                   |   6 +-
 man/process_weight_at_age.Rd                  |  23 +-
 man/pull_US_data.Rd                           | 118 ++++++++
 man/pull_database.Rd                          |   4 +-
 man/roxygen/templates/savedir.R               |   4 -
 man/roxygen/templates/species.R               |   1 -
 tests/testthat/setup-getdatabase.R            |   5 +-
 tests/testthat/teardown-getdatabase.R         |   3 +-
 tests/testthat/test-catchesequal.R            |  20 --
 32 files changed, 547 insertions(+), 280 deletions(-)
 create mode 100644 R/pull_US_data.R
 delete mode 100644 README.Rmd
 delete mode 100644 inst/extdata/sql/atSeaSquashTableAges.sql
 delete mode 100644 inst/extdata/sql/atsea_foreign_ages.sql
 create mode 100644 inst/extdata/sql/nages_domestic.sql
 create mode 100644 inst/extdata/sql/nages_foreign.sql
 rename inst/extdata/sql/{norpac_catch.sql => ncatch.sql} (100%)
 rename inst/extdata/sql/{atseaAgeWeight.sql => nweight.sql} (100%)
 delete mode 100644 inst/extdata/sql/pacfin.atseabysector.sql
 rename inst/extdata/sql/{pacfin_catch.sql => pcatch.sql} (83%)
 rename inst/extdata/sql/{pacfin_spec.sql => pspec.sql} (100%)
 create mode 100644 man/pull_US_data.Rd
 delete mode 100644 man/roxygen/templates/savedir.R

diff --git a/NAMESPACE b/NAMESPACE
index 45cbfa4..601a3f8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -20,6 +20,7 @@ export(process_catch_norpac)
 export(process_database)
 export(process_weight_at_age)
 export(process_weight_at_age_survey)
+export(pull_US_data)
 export(pull_database)
 export(queryDB)
 export(update_ss3_catch)
diff --git a/R/hakedata-.R b/R/hakedata-.R
index 9583cb5..e0846c3 100644
--- a/R/hakedata-.R
+++ b/R/hakedata-.R
@@ -56,6 +56,7 @@ hakedata_wd <- function() {
     )
   }
   stopifnot(fs::dir_exists(wd))
+  stopifnot(basename(wd) == "data-tables")
   return(wd)
 }
 
@@ -87,43 +88,32 @@ hakedata_year <- function() {
 #' and stored or entered passwords for accessing databases that store
 #' confidential information about landings of Pacific Hake.
 #'
-#' @param database A vector of character values specifying which databases you
-#'   want login information for.
-#' @param file A file path specifying where to find the passwords.
-#'   The path can be full or relative to your current working directory.
-#'   If a path is provided, the file that it leads to
-#'   must be for a text file with one password per line for each database
-#'   in the `database` argument and in that order.
-#'   The default for `database` means that the file would have two lines,
-#'   where the first line is the NORPAC password and
-#'   the second line is the PacFIN password.
-#'   These passwords should not be surrounded with quotes.
-#'   If a file name is not provided, which is the default behaviour, then
-#'   the user will be prompted for their passwords. This also happens if
-#'   the file cannot be found given the path provided.
+#' @inheritParams pull_US_data
 #'
-#' @return A list with two entries, `usernames` and `passwords`.
-#' Each element will have the same number of entries as the
-#' input argument `database` and be named using the elements of `database`.
-#' The list is invisibly returned to ensure that the passwords are not printed
-#' to the screen. Thus, the function call should be assigned to an object.
+#' @return
+#' A list with two entries, `usernames` and `passwords`. Each entry contain a
+#' named vector with one element for each element in the input argument
+#' `database`. The list is invisibly returned to ensure that the passwords are
+#' not printed to the screen. Thus, the function call should be assigned to an
+#' object.
 #' @export
 #' @author Kelli F. Johnson
 #' @examples
 #' \dontrun{
 #' # Prompted for passwords for each database
 #' test <- hakedata_sql_password()
-#' # Prompted for passwords for each database because file is not found
-#' test <- hakedata_sql_password(file = "doesnotwork.txt")
+#' # Prompted for passwords for each database because password_file is not found
+#' test <- hakedata_sql_password(password_file = "doesnotwork.txt")
 #' # On Kelli Johnson's machine, the following will work
-#' test <- hakedata_sql_password(file = "password.txt")
+#' test <- hakedata_sql_password(password_file = "password.txt")
 #' # Doesn't work because entry for database is not in the list
 #' # of allowed databases, i.e., the default for `database`.
 #' test <- hakedata_sql_password(database = "onedatabase")
 #' # Only look for one password
 #' test <- hakedata_sql_password(database = "NORPAC")
 #' }
-hakedata_sql_password <- function(database = c("NORPAC", "PacFIN"), file) {
+hakedata_sql_password <- function(password_file,
+                                  database = c("NORPAC", "PacFIN")) {
   user <- Sys.info()["user"]
   database <- match.arg(database, several.ok = TRUE)
   name <- switch(user,
@@ -141,14 +131,13 @@ hakedata_sql_password <- function(database = c("NORPAC", "PacFIN"), file) {
     }
   )
   stopifnot(!is.null(name))
+  stopifnot(all(names(name) %in% database))
 
-  if (missing(file)) {
-    file <- NULL
-  } else {
-    if (!file.exists(file)) file <- NULL
+  if (missing(password_file) || !file.exists(password_file)) {
+    password_file <- NULL
   }
 
-  if (is.null(file)) {
+  if (is.null(password_file)) {
     passwords <- rep(NA, length(database))
     for (ii in seq_along(database)) {
       passwords[ii] <- readline(
@@ -160,7 +149,8 @@ hakedata_sql_password <- function(database = c("NORPAC", "PacFIN"), file) {
       )
     }
   } else {
-    passwords <- readLines(file, warn = FALSE)
+    passwords <- readLines(password_file, warn = FALSE)
+    stopifnot(length(database) == length(passwords))
   }
 
   names(passwords) <- database
diff --git a/R/process_age-.R b/R/process_age-.R
index 84ccc58..c66478e 100644
--- a/R/process_age-.R
+++ b/R/process_age-.R
@@ -18,7 +18,7 @@
 #' @author Kelli F. Johnson
 #'
 process_age_sea <- function(atsea.ages = get_local(file = "atsea.ages.Rdat"),
-                            ncatch = get_local(file = "norpac_catch.Rdat"),
+                            ncatch = get_local(file = "ncatch.Rdat"),
                             years = 2008:hakedata_year(),
                             ages = 1:15,
                             files = fs::path(
diff --git a/R/process_catch-.R b/R/process_catch-.R
index e33970e..fe60bf6 100644
--- a/R/process_catch-.R
+++ b/R/process_catch-.R
@@ -89,7 +89,7 @@
 #'   * us-cp-catch-rate-by-month.csv
 #'   * us-ms-catch-rate-by-month.csv
 #'
-process_catch_norpac <- function(ncatch = get_local(file = "norpac_catch.Rdat"),
+process_catch_norpac <- function(ncatch = get_local(file = "ncatch.Rdat"),
                                  nyears = 5,
                                  savedir = hakedata_wd()) {
   # Setup the environment
@@ -247,14 +247,14 @@ process_catch_norpac <- function(ncatch = get_local(file = "norpac_catch.Rdat"),
 #'
 #' @template pcatch
 #' @template nyears
-#' @template savedir
+#' @inheritParams process_weight_at_age_survey
 #'
 #' @return The following files are saved to the disk:
 #' * us-shore-catch-by-month.csv
 #' * us-research-catch-by-month.csv
 #' * us-ti-catch-by-month.csv
 #'
-process_catch_pacfin <- function(pcatch = get_local(file = "pacfin_catch.Rdat"),
+process_catch_pacfin <- function(pcatch = get_local(file = "pcatch.Rdat"),
                                  nyears = 5,
                                  savedir = hakedata_wd()) {
   # FLEET XXX is in the hake assessment as shore-based catches,
diff --git a/R/process_database.R b/R/process_database.R
index f3821a0..eea7720 100644
--- a/R/process_database.R
+++ b/R/process_database.R
@@ -15,5 +15,9 @@ process_database <- function() {
   # Weight at age
   process_weight_at_age_survey()
   process_weight_at_age_us()
+  old <- process_weight_at_age(
+    max_year = hakedata_year() - 1,
+    output_wtatage_file_name = "wtatage_fix.ss"
+  )
   withforecast <- process_weight_at_age()
 }
diff --git a/R/process_weight_at_age-.R b/R/process_weight_at_age-.R
index 400136d..17c0837 100644
--- a/R/process_weight_at_age-.R
+++ b/R/process_weight_at_age-.R
@@ -128,16 +128,14 @@ process_weight_at_age_us <- function(savedir = hakedata_wd()) {
 #' This will correspond to the maximum age group in the data, not in the
 #' model because SS can model many ages when there is only information in
 #' the data for a few ages.
-#' @param yrs A vector of years to search for recent data. Typically,
-#' the vector starts with 2008 and ends with the most recent year
-#' of data. This will allow files created from `process_weight_at_age_US()` to
-#' be included in the analysis, i.e., recent US data. Typically, you
-#' should not have to change this value from the default entry.
-#' @param navgyears The number of early and late years to average since
-#' 1975 and \code{max(yrs)} for the early and late analysis asked for
-#' by the Scientific Review Group in 2017. The argument can be a single
-#' value or a vector of two values, where in the latter case the second
-#' value will be used for the most recent time period.
+#' @param max_year A four-digit integer specifying the maximum year of data
+#'   that you want to include in the weight-at-age data. The default is the
+#'   last year of data found using [hakedata_year()].
+#' @param navgyears The number of early and late years to average since 1975 and
+#'   `max_year` for the early and late analysis asked for by the Scientific
+#'   Review Group in 2017. The argument can be a single value or a vector of two
+#'   values, where in the latter case the second value will be used for the most
+#'   recent time period.
 #' @param nforecast The number of years to forecast into the future.
 #' Typically, this is three for the hake assessment and will lead to
 #' this many rows of mean weight-at-age data being copied to the data frame
@@ -153,10 +151,11 @@ process_weight_at_age_us <- function(savedir = hakedata_wd()) {
 #'
 process_weight_at_age <- function(dir = hakedata_wd(),
                                   maxage = 15,
-                                  yrs = 2008:hakedata_year(),
+                                  max_year = hakedata_year(),
                                   navgyears = 5,
                                   nforecast = 4,
-                                  maturity = maturity_at_age) {
+                                  maturity = maturity_at_age,
+                                  output_wtatage_file_name = "wtatage.ss") {
   fs::dir_create(path = file.path(dir, "plots"))
 
   # length-weight-age_data.rds provided by CG on 2021-01-09 in google drive #703
@@ -171,13 +170,21 @@ process_weight_at_age <- function(dir = hakedata_wd(),
   dat <- purrr::map_dfr(
     files_weights,
     .f = weight_at_age_read
-  ) %>%
-    weight_at_age_outlier(filter = FALSE, drop = FALSE)
-
-  late <- (max(yrs) - navgyears + 1):(max(yrs))
+  ) |>
+    # Fix the four--five weight units from PacFIN that are wrong
+    # TODO: remove this mutate when the data is fixed.
+    dplyr::mutate(
+      Weight_kg = ifelse(
+        (Source == "US_shore" & Weight_kg < 0.09 & Age_yrs > 4),
+        Weight_kg * 10,
+        Weight_kg
+      )
+    ) |>
+    weight_at_age_outlier(filter = FALSE, drop = FALSE) |>
+    dplyr::filter(!outlier, Year <= max_year)
 
   gg <- plot_weight_at_age(
-    data = dplyr::filter(dat, Age_yrs <= 10, outlier == FALSE),
+    data = dplyr::filter(dat, Age_yrs <= 10),
     maxage = maxage
   )
   ggplot2::ggsave(
@@ -186,7 +193,7 @@ process_weight_at_age <- function(dir = hakedata_wd(),
     filename = file.path(dir, "plots", "meanweightatage_source.png")
   )
   gg <- plot_weight_at_age(
-    data = dplyr::filter(dat, Age_yrs <= maxage, outlier == FALSE),
+    data = dplyr::filter(dat, Age_yrs <= maxage),
     maxage = maxage
   ) +
     ggplot2::facet_grid(cat ~ .)
@@ -197,7 +204,6 @@ process_weight_at_age <- function(dir = hakedata_wd(),
 
   #### making input files for SS with the holes still present
   # NULL months keeps the Poland data
-  dat <- dplyr::filter(dat, !outlier)
   wtage_All <- weight_at_age_wide(dat)
   wtage_All_wMean <- dplyr::bind_rows(
     weight_at_age_wide(dat %>% dplyr::mutate(Year = -1940)),
@@ -243,7 +249,7 @@ process_weight_at_age <- function(dir = hakedata_wd(),
   wtageInterp2_All <- fill_wtage_matrix(wtageInterp1_All)
   wtageInterp2_All$Note <- fill_wtage_matrix(wtage_All)$Note
 
-  # write output combining all fleets closer to format used by SS
+  # write output combining all fleets closer to format used by SS3
   wtage_All_wMean$Note <- c(paste("# Mean from ", min(dat$Year), "-", max(dat$Year), sep = ""), wtageInterp2_All$Note)
   wtageInterp2_All <- rbind(wtage_All_wMean[1, ], wtageInterp2_All)
 
@@ -254,7 +260,7 @@ process_weight_at_age <- function(dir = hakedata_wd(),
     counts = counts_All_wMean,
     lengths = lenage_All_wMean,
     dir = file.path(dir, "plots"),
-    year = max(yrs),
+    year = max_year,
     maxage = maxage
   )
 
@@ -273,7 +279,7 @@ process_weight_at_age <- function(dir = hakedata_wd(),
   withforecast <- dplyr::bind_rows(
     wtage_extended,
     wtage_extended %>%
-      dplyr::filter(`#Yr` %in% late) %>%
+      dplyr::filter(`#Yr` %in% (max_year - navgyears + 1):(max_year)) %>%
       dplyr::mutate(
         dplyr::across(.cols = dplyr::starts_with("a"), mean),
         `#Yr` = max(`#Yr`) + 1:NROW(.)
@@ -283,7 +289,7 @@ process_weight_at_age <- function(dir = hakedata_wd(),
       )
   )
   write_wtatage_file(
-    file = fs::path(dirname(dir), "wtatage.ss"),
+    file = fs::path(dirname(dir), output_wtatage_file_name),
     data = withforecast,
     maturity = maturity
   )
@@ -292,5 +298,5 @@ process_weight_at_age <- function(dir = hakedata_wd(),
     file = fs::path(dir, "LWAdata.Rdata")
   )
 
-  return(withforecast)
+  return(invisible(withforecast))
 }
diff --git a/R/pull_US_data.R b/R/pull_US_data.R
new file mode 100644
index 0000000..bc7537a
--- /dev/null
+++ b/R/pull_US_data.R
@@ -0,0 +1,264 @@
+#' Pull data from U.S. databases for Pacific Hake
+#'
+#' Extraction of
+#' * catch data,
+#' * weight measurements,
+#' * length measurements,
+#' * age reads, and
+#' * management quantities
+#' from NORPAC and PacFIN databases for the Pacific Hake assessment. Other,
+#' additional, databases may be added in the future through the `database =`
+#' argument.
+#'
+#' @details
+#' `pull_US_data()` includes several steps, outlined below, and leads to many
+#' files being saved to your computer. Originally, files were **NOT**
+#' overwritten. In 2022, Kelli F. Johnson changed the code to not save previous
+#' downloads to save on disk space, reduce the storing of redundant information,
+#' and simplify the code base that needed maintenance.
+#' * Find the folder where the data should be saved with [hakedata_wd()].
+#' * Extract data from NORPAC.
+#'   * Catch data
+#'   * Weight and age data
+#'   * Squash table of ages (that also includes lengths)
+#'   * Foreign ages
+#'   * Species list
+#' * Extract data from PacFIN.
+#'   * Catch data
+#'   * Age, length, and weight data from bds table
+#'   * At-sea data
+#'   * Management quantities such as allowable catch limits (ACL) for the
+#'     Pacific Fisheries Management Council, i.e., U.S. quantities of interest.
+#' * Save each object to the disk in the "extractedData" directory in
+#'   [hakedata_wd()].
+#'
+#' @param password_file A file path specifying where to find the passwords. The
+#'   path can be full or relative to your current working directory. If a path
+#'   is provided, the file must be a text file with one password per line for
+#'   each database in the `database` argument and in that order. The default for
+#'   `database` means that the file has two lines, where the first line is the
+#'   NORPAC password and the second line is the PacFIN password. These passwords
+#'   should not be surrounded with quotes. If a file name is not provided, which
+#'   is the default behavior, then the user will be prompted for their
+#'   passwords. This also happens if the file cannot be found given the path
+#'   provided. These passwords are needed because the databases store
+#'   confidential information.
+#' @param database A vector of character strings indicating which databases you
+#'   want to pull information from. Options include `c("NORPAC", "PacFIN")`, one
+#'   or both (the default) can be specified. Note that case matters here.
+#' @param start_year An integer value specifying the start year of data to
+#'   include in the extraction. The default is 1900 to include all the data
+#'   possible, including years where only a foreign fleet fished.
+#' @param end_year An integer value specifying the final year of data to include
+#'   in the extraction. The default uses [hakedata_year()] to determine the
+#'   terminal year.
+#' @param save_dir A string specifying the full path to a directory where you
+#'   want files saved. The default is to save them in a folder found using
+#'   [hakedata_wd()], which returns a path specific to the personal preferences
+#'   of JTC members.
+#'
+#' @seealso
+#' * [hakedata_year()]
+#' * [hakedata_wd()]
+#' @export
+#' @author Kelli F. Johnson
+#' @return
+#' An environment or `list` with several objects pulled from the desired
+#' databases are invisibly returned. Additionally, several `.Rdat` files are
+#' saved to the disk in a newly created folder called `extractedData` inside of
+#' the directory passed to save_dir. The following files are saved:
+#' * nages.Rdat
+#' * nweight.Rdat
+#' * ncatch.Rdat
+#' * pcatch.Rdat
+#' * page.Rdat
+#'
+#' @examples
+#' \dontrun{
+#' # An environment with objects is returned
+#' dataenv <- pull_US_data()
+#' # Access individual objects using `get()`
+#' head(get("ncatch", envir = dataenv))
+#' # Access individual objects pretending the environment is a list
+#' dataenv[["ncatch"]][1:5, ]
+#' }
+#'
+pull_US_data <- function(password_file,
+                         database = c("NORPAC", "PacFIN"),
+                         start_year = 1900,
+                         end_year = hakedata_year(),
+                         save_dir = hakedata_wd()) {
+  # Ensure that the database names are spelled appropriately with the right case
+  database <- match.arg(database, several.ok = TRUE)
+
+  # File management
+  sql_dir <- system.file("extdata", "sql", package = "hakedataUSA")
+  info <- hakedata_sql_password(
+    password_file = password_file,
+    database = database
+  )
+  final_dir <- file.path(save_dir, "extractedData")
+  fs::dir_create(path = final_dir, recurse = TRUE)
+
+  # Set digits so that the full haul join number is displayed
+  oldoptions <- options()
+  on.exit(options(oldoptions), add = TRUE)
+  options(digits = 19)
+
+  # Create local function
+  localsave <- function(data, trailing_name, dir) {
+    x <- deparse(substitute(data))
+    assign(x, data)
+    end <- paste0(trailing_name, ".Rdat")
+    save(
+      list = x,
+      file = file.path(dir, "extractedData", end)
+    )
+  }
+
+  # NORPAC
+  if ("NORPAC" %in% database) {
+    # Catches
+    ncatch <- queryDB(
+      queryFilename = fs::path(sql_dir, "ncatch.sql"),
+      db = "NORPAC",
+      uid = info[["username"]][["NORPAC"]],
+      pw = info[["password"]][["NORPAC"]],
+      start = start_year, end = end_year
+    )
+    localsave(ncatch, "ncatch", save_dir)
+    # Age and weight data
+    # TODO: determine if nweight is used at all
+    nweight <- queryDB(
+      queryFilename = fs::path(sql_dir, "nweight.sql"),
+      db = "NORPAC",
+      uid = info[["username"]][["NORPAC"]],
+      pw = info[["password"]][["NORPAC"]],
+      sp = "206", start = start_year, end = end_year
+    )
+    localsave(nweight, "nweight", save_dir)
+    # Age and weight data from squash table
+    atsea.ages <- queryDB(
+      queryFilename = fs::path(sql_dir, "nages_domestic.sql"),
+      db = "NORPAC",
+      uid = info[["username"]][["NORPAC"]],
+      pw = info[["password"]][["NORPAC"]],
+      sp = "206",
+      start = start_year,
+      end = end_year
+    ) |>
+      dplyr::mutate(
+        Month = format(HAUL_OFFLOAD_DATE, "%m"),
+        Year = as.integer(format(HAUL_OFFLOAD_DATE, "%Y")),
+        .after = HAUL_JOIN
+      )
+    atsea.foreign <- queryDB(
+      queryFilename = fs::path(sql_dir, "nages_foreign.sql"),
+      db = "NORPAC",
+      uid = info[["username"]][["NORPAC"]],
+      pw = info[["password"]][["NORPAC"]],
+      sp = "206",
+      start = start_year,
+      end = end_year
+    ) |>
+      dplyr::rename(
+        Year = YEAR
+      )
+    nages <- dplyr::full_join(
+      x = atsea.ages,
+      y = atsea.foreign,
+      by = colnames(atsea.foreign)
+    )
+    rm(atsea.ages, atsea.foreign)
+    localsave(nages, "nages", save_dir)
+  }
+
+  if ("PacFIN" %in% database) {
+    # Catches
+    # Remove XXX fleet (foreign catch?)
+    pcatch <- queryDB(
+      queryFilename = fs::path(sql_dir, "pcatch.sql"),
+      db = "PACFIN",
+      uid = info[["username"]][["PacFIN"]],
+      pw = info[["password"]][["PacFIN"]],
+      sp = "PWHT",
+      start = start_year,
+      end = end_year
+    ) |>
+      dplyr::mutate(
+        Date = as.Date(LANDING_DATE),
+        month = f_date(Date, "%m"),
+        # Entries are LE, OA, R, TI, and XX where R is research
+        sector = ifelse(grepl("^R", FLEET), "USresearch", "USshore")
+      ) |>
+      dplyr::rename(year = YEAR) |>
+      dplyr::select(-LANDING_DATE) |>
+      dplyr::arrange(Date)
+    localsave(pcatch, "pcatch", save_dir)
+    # bds data
+    page <- queryDB(
+      queryFilename = dir(
+        path = sql_dir,
+        pattern = "pacfin_comprehensive_bds",
+        full.names = TRUE
+      ),
+      db = "PACFIN",
+      uid = info[["username"]][["PacFIN"]],
+      pw = info[["password"]][["PacFIN"]],
+      sp = "PWHT",
+      start = start_year,
+      end = end_year
+    ) |>
+      dplyr::mutate(
+        # Convert weight to g
+        FISH_WEIGHT = FISH_WEIGHT * ifelse(
+          test = FISH_WEIGHT_UNITS %in% c("LBS", "P"),
+          yes = pound_to_gram,
+          no = 1
+        ),
+        FISH_WEIGHT = ifelse(FISH_WEIGHT == 0, NA, FISH_WEIGHT),
+        FISH_LENGTH = FISH_LENGTH * ifelse(
+          test = FISH_LENGTH_UNITS %in% c("CM"),
+          yes = 10,
+          no = 1
+        ),
+        FISH_WEIGHT_UNITS = ifelse(
+          test = FISH_WEIGHT_UNITS %in% c("LBS", "P"),
+          yes = "G",
+          no = FISH_WEIGHT_UNITS
+        ),
+        FISH_LENGTH_UNITS = ifelse(
+          test = FISH_LENGTH_UNITS == "CM",
+          yes = "MM",
+          no = FISH_LENGTH_UNITS
+        )
+      )
+    localsave(page, "page", save_dir)
+    pspec <- queryDB(
+      queryFilename = fs::path(sql_dir, "pspec.sql"),
+      db = "PACFIN",
+      uid = info[["username"]][["PacFIN"]],
+      pw = info[["password"]][["PacFIN"]],
+      sp = "PWHT",
+      start = start_year,
+      end = end_year
+    )
+    if (NCOL(pspec) != 1) {
+      pspec <- pspec[!duplicated(pspec[, "YEAR"]), ]
+    }
+    localsave(pspec, "pspec", save_dir)
+  }
+
+  e1 <- new.env()
+  if ("norpac" %in% tolower(database)) {
+    assign("nages", nages, envir = e1)
+    assign("nweight", nweight, envir = e1)
+    assign("ncatch", ncatch, envir = e1)
+  }
+  if ("pacfin" %in% tolower(database)) {
+    assign("pcatch", pcatch, envir = e1)
+    assign("page", page, envir = e1)
+    assign("pspec", pspec, envir = e1)
+  }
+  invisible(e1)
+}
diff --git a/R/pull_database.R b/R/pull_database.R
index 9ce57e7..24ae2bd 100644
--- a/R/pull_database.R
+++ b/R/pull_database.R
@@ -58,7 +58,7 @@
 #'   If this argument is `NULL`, users will be prompted
 #'   for their passwords.
 #'   Passwords are needed because the databases store confidential data.
-#' @template savedir
+#' @inheritParams process_weight_at_age_survey
 #'
 #' @seealso
 #' * [hakedata_year()]
diff --git a/README.Rmd b/README.Rmd
deleted file mode 100644
index 79423e8..0000000
--- a/README.Rmd
+++ /dev/null
@@ -1,68 +0,0 @@
----
-output: github_document
----
-
-<!-- README.md is generated from README.Rmd. Please edit that file -->
-
-```{r, include = FALSE}
-knitr::opts_chunk$set(
-  collapse = TRUE,
-  comment = "#>",
-  fig.path = "man/figures/README-",
-  out.width = "100%",
-  eval = FALSE
-)
-```
-
-# hakedataUSA
-
-<!-- badges: start -->
-<!-- badges: end -->
-
-The goal of {hakedataUSA} is to provide code to extract and workup the U.S. data
-for the assessment of Pacific Hake.
-
-## Instructions
-
-1. First, you must update `data-raw/quotas.csv` to include the sector-specific
-   quotas. These values are used when processing the data, mainly for the
-   creation of figures. Then, from within R, source `data-raw/quotas.R` and the
-   internal data object will be updated and ready for use. Commit both
-  `data-raw/quotas.csv` and `data-quotas.rda` to the repository and push.
-1. Next, load the package. This can be accomplished through GitHub (first
-   chunk) or using a local clone (second chunk).
-   ```{r installation-github}
-chooseCRANmirror(ind = 1)
-# install.packages("pak")
-pak::pak("pacific-hake/hakedataUSA")
-library(hakedataUSA)
-   ```
-   ```{r installation-local}
-chooseCRANmirror(ind = 1)
-stopifnot(basename(getwd()) == "hakedataUSA")
-devtools::load_all()
-   ```
-1. The path to where all of the raw output will be saved is stored in an 
-   internal function, i.e., `hakedata_wd()`. Try it out, see if it works for 
-   you. If it does not work, then you will need to alter the function, which is 
-   stored in `R/hakedata-R`. The function should result in a path ending with 
-   `data-tables` inside of your cloned version of
-   [pacific-hake/hake-assessment][hake-assessment].
-1. The remainder of the code will pull from the data bases and set up the input
-   files.
-```{r execute}
-pull_database()
-process_database()
-
-write_bridging(
-  dir_input = fs::path(dirname(hakedata_wd()), "models", "2022.01.10_base"),
-  dir_output = fs::path(dirname(hakedata_wd()), "models", "2023", "01-version", "02-bridging-models")
-)
-```
-
-## Issues
-
-Please contact kelli.johnson@noaa.gov if there are issues with the code.
-Note that the databases will only be accessible to U.S. members of the JTC.
-
-[hake-assessment]: www.github.com/pacific-hake/hake-assessment
diff --git a/README.md b/README.md
index 39ec4f7..25871df 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,7 @@
-
-<!-- README.md is generated from README.Rmd. Please edit that file -->
-
 # hakedataUSA
 
 <!-- badges: start -->
+
 <!-- badges: end -->
 
 The goal of {hakedataUSA} is to provide code to extract and workup the
@@ -20,43 +18,48 @@ U.S. data for the assessment of Pacific Hake.
 
 2.  Next, load the package. This can be accomplished through GitHub
     (first chunk) or using a local clone (second chunk).
-
+    
     ``` r
     chooseCRANmirror(ind = 1)
     # install.packages("pak")
     pak::pak("pacific-hake/hakedataUSA")
     library(hakedataUSA)
     ```
-
+    
     ``` r
     chooseCRANmirror(ind = 1)
     stopifnot(basename(getwd()) == "hakedataUSA")
     devtools::load_all()
     ```
 
-3.  The path to where all of the raw output will be saved is stored in
-    an internal function, i.e., `hakedata_wd()`. Try it out, see if it
+3.  The path to where the raw output will be saved is stored in an
+    internal function, i.e., `hakedata_wd()`. Try it out, see if it
     works for you. If it does not work, then you will need to alter the
     function, which is stored in `R/hakedata-R`. The function should
     result in a path ending with `data-tables` inside of your cloned
     version of
     [pacific-hake/hake-assessment](www.github.com/pacific-hake/hake-assessment).
 
-4.  The remainder of the code will pull from the data bases and set up
-    the input files.
+4.  Check that the correct year will be pulled for the data of interest
+    by running `hakedata_year()`. This will be the last year of data.
 
-``` r
-pull_database()
-process_database()
-
-write_bridging(
-  dir_input = fs::path(dirname(hakedata_wd()), "models", "2022.01.10_base"),
-  dir_output = fs::path(dirname(hakedata_wd()), "models", "2023", "01-version", "02-bridging-models")
-)
-```
+5.  To pull the data for the U.S.A., run the following code:
+    
+    ``` r
+    pull_US_data()
+    ```
 
-## Issues
+6.  To process the recently pulled data run the following code:
+    
+    ``` r
+    process_database()
+    
+    write_bridging(
+      dir_input = fs::path(dirname(hakedata_wd()), "models", "2022.01.10_base"),
+      dir_output = fs::path(dirname(hakedata_wd()), "models", "2023", "01-version", "02-bridging-models")
+    )
+    ```
 
-Please contact <kelli.johnson@noaa.gov> if there are issues with the
-code. Note that the databases will only be accessible to U.S. members of
-the JTC.
+7.  This process must be augmented if it is a survey year to get the
+    survey data from the server and process it accordingly. More
+    information on that process is to come later.
diff --git a/inst/extdata/sql/atSeaSquashTableAges.sql b/inst/extdata/sql/atSeaSquashTableAges.sql
deleted file mode 100644
index 649fb81..0000000
--- a/inst/extdata/sql/atSeaSquashTableAges.sql
+++ /dev/null
@@ -1,24 +0,0 @@
-SELECT *
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.CRUISE,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.PERMIT,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.LATDD_START,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.LONDD_START,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.LATDD_END,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.LONDD_END,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.FISHING_DEPTH_FATHOMS,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.BOTTOM_DEPTH_FATHOMS,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.SPECIES,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.SEX,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.LENGTH,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.WEIGHT,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.SPECIMEN_NUMBER,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.BARCODE,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.AGE,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.TYPE_1_OTOLITH,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.TYPE_3_SEX_LENGTH_WEIGHT,
--- DEBRIEFED_AGE_SQUASH_SP_TYPE.YEAR
-FROM DEBRIEFED_AGE_SQUASH_SP_TYPE
-WHERE DEBRIEFED_AGE_SQUASH_SP_TYPE.SPECIES = &sp
-AND DEBRIEFED_AGE_SQUASH_SP_TYPE.YEAR >= &beginyr
-AND DEBRIEFED_AGE_SQUASH_SP_TYPE.YEAR <= &endyr
-;
diff --git a/inst/extdata/sql/atsea_foreign_ages.sql b/inst/extdata/sql/atsea_foreign_ages.sql
deleted file mode 100644
index c303eb7..0000000
--- a/inst/extdata/sql/atsea_foreign_ages.sql
+++ /dev/null
@@ -1,14 +0,0 @@
-SELECT FOREIGN_HAUL.*,
-  FOREIGN_AGE.AGE,
-  FOREIGN_AGE.SPECIMEN_NUMBER,
-  FOREIGN_AGE.SEX,
-  FOREIGN_AGE.LENGTH,
-  FOREIGN_AGE.INDIV_WEIGHT,
-  FOREIGN_AGE.MATURITY_CODE
-FROM FOREIGN_HAUL
-INNER JOIN FOREIGN_AGE
-ON FOREIGN_HAUL.HAUL_JOIN = FOREIGN_AGE.HAUL_JOIN
-WHERE 
-  FOREIGN_HAUL.LATITUDE < 4900 AND
-  FOREIGN_AGE.AGE IS NOT NULL AND
-  FOREIGN_AGE.SPECIES = 206;
diff --git a/inst/extdata/sql/nages_domestic.sql b/inst/extdata/sql/nages_domestic.sql
new file mode 100644
index 0000000..95828b0
--- /dev/null
+++ b/inst/extdata/sql/nages_domestic.sql
@@ -0,0 +1,20 @@
+SELECT
+  DEBRIEFED_AGE_SQUASH_SP_TYPE.HAUL_JOIN,
+  DEBRIEFED_AGE_SQUASH_SP_TYPE.AGE,
+  DEBRIEFED_AGE_SQUASH_SP_TYPE.LENGTH,
+  DEBRIEFED_AGE_SQUASH_SP_TYPE.WEIGHT,
+  DEBRIEFED_AGE_SQUASH_SP_TYPE.SEX,
+  DEBRIEFED_AGE_SQUASH_SP_TYPE.CRUISE,
+  DEBRIEFED_AGE_SQUASH_SP_TYPE.CDQ_CODE,
+  DEBRIEFED_AGE_SQUASH_SP_TYPE.BOTTOM_DEPTH_FATHOMS,
+  DEBRIEFED_AGE_SQUASH_SP_TYPE.FISHING_DEPTH_FATHOMS,
+  DEBRIEFED_AGE_SQUASH_SP_TYPE.HAUL_OFFLOAD,
+  DEBRIEFED_AGE_SQUASH_SP_TYPE.HAUL_OFFLOAD_DATE,
+  DEBRIEFED_AGE_SQUASH_SP_TYPE.LATDD_START,
+  DEBRIEFED_AGE_SQUASH_SP_TYPE.LATDD_END,
+  DEBRIEFED_AGE_SQUASH_SP_TYPE.LONDD_START,
+  DEBRIEFED_AGE_SQUASH_SP_TYPE.LONDD_END
+FROM DEBRIEFED_AGE_SQUASH_SP_TYPE
+WHERE DEBRIEFED_AGE_SQUASH_SP_TYPE.SPECIES = &sp
+AND DEBRIEFED_AGE_SQUASH_SP_TYPE.YEAR >= &beginyr
+AND DEBRIEFED_AGE_SQUASH_SP_TYPE.YEAR <= &endyr;
diff --git a/inst/extdata/sql/nages_foreign.sql b/inst/extdata/sql/nages_foreign.sql
new file mode 100644
index 0000000..f88b8dc
--- /dev/null
+++ b/inst/extdata/sql/nages_foreign.sql
@@ -0,0 +1,18 @@
+SELECT 
+  FOREIGN_HAUL.HAUL_JOIN,
+  FOREIGN_HAUL.CRUISE,
+  FOREIGN_HAUL.YEAR,
+  FOREIGN_HAUL.LATITUDE LATDD_END,
+  FOREIGN_HAUL.LONGITUDE LONDD_END,
+  FOREIGN_HAUL.BOTTOM_DEPTH BOTTOM_DEPTH_FATHOMS,
+  FOREIGN_AGE.AGE,
+  FOREIGN_AGE.LENGTH,
+  FOREIGN_AGE.INDIV_WEIGHT WEIGHT,
+  FOREIGN_AGE.SEX
+FROM FOREIGN_HAUL
+INNER JOIN FOREIGN_AGE
+ON FOREIGN_HAUL.HAUL_JOIN = FOREIGN_AGE.HAUL_JOIN
+WHERE 
+  FOREIGN_HAUL.LATITUDE < 4900 AND
+  -- FOREIGN_AGE.AGE IS NOT NULL AND
+  FOREIGN_AGE.SPECIES = 206;
diff --git a/inst/extdata/sql/norpac_catch.sql b/inst/extdata/sql/ncatch.sql
similarity index 100%
rename from inst/extdata/sql/norpac_catch.sql
rename to inst/extdata/sql/ncatch.sql
diff --git a/inst/extdata/sql/atseaAgeWeight.sql b/inst/extdata/sql/nweight.sql
similarity index 100%
rename from inst/extdata/sql/atseaAgeWeight.sql
rename to inst/extdata/sql/nweight.sql
diff --git a/inst/extdata/sql/pacfin.atseabysector.sql b/inst/extdata/sql/pacfin.atseabysector.sql
deleted file mode 100644
index 5e927de..0000000
--- a/inst/extdata/sql/pacfin.atseabysector.sql
+++ /dev/null
@@ -1,13 +0,0 @@
-SELECT 
-         LANDING_YEAR as Year,
-         LANDING_MONTH as Month,
-         NORPAC_SPECIES_CODE,
-         SECTOR,
-         SUM(CATCH_WEIGHT_MTONS) as mt
-FROM     PACFIN_MARTS.Comprehensive_npac
-WHERE
-         LANDING_YEAR >= &beginyr AND
-         LANDING_YEAR <= &endyr AND
-         NORPAC_SPECIES_CODE = upper(&sp)
-GROUP BY LANDING_YEAR, LANDING_MONTH, NORPAC_SPECIES_CODE, SECTOR
-ORDER BY SECTOR, LANDING_YEAR, LANDING_MONTH;
diff --git a/inst/extdata/sql/pacfin_comprehensive_bds.sql b/inst/extdata/sql/pacfin_comprehensive_bds.sql
index 628fc7e..c9ea0d3 100644
--- a/inst/extdata/sql/pacfin_comprehensive_bds.sql
+++ b/inst/extdata/sql/pacfin_comprehensive_bds.sql
@@ -1,5 +1,4 @@
 select
-   PACFIN_SPECIES_CODE spid,
    sample_year,
    sample_month,
    sample_agency,
@@ -14,7 +13,6 @@ select
    AGENCY_GEAR_CODE gear,
    PACFIN_GEAR_CODE grid,
    data_type,
-   PACFIN_CONDITION_CODE cond,
    FISH_LENGTH,
    FISH_LENGTH_UNITS,
    FISH_MATURITY_CODE maturity,
@@ -27,7 +25,6 @@ select
    NUMBER_OF_FEMALES females_num,
    WEIGHT_OF_LANDING_LBS total_wgt,
    EXPANDED_SAMPLE_WEIGHT exp_wt,
-   AGE_METHOD_CODE age_method,
    FINAL_FISH_AGE_IN_YEARS AGE,
    OBSERVED_FREQUENCY FREQ
 FROM pacfin_marts.COMPREHENSIVE_BDS_COMM
diff --git a/inst/extdata/sql/pacfin_catch.sql b/inst/extdata/sql/pcatch.sql
similarity index 83%
rename from inst/extdata/sql/pacfin_catch.sql
rename to inst/extdata/sql/pcatch.sql
index 8185efd..65bf4fc 100644
--- a/inst/extdata/sql/pacfin_catch.sql
+++ b/inst/extdata/sql/pcatch.sql
@@ -2,10 +2,7 @@ REM Get pacfin catch from PACFIN_MARTS.COMPREHENSIVE_FT table, which includes re
 SELECT   cft.PACFIN_YEAR YEAR,
          cft.FLEET_CODE FLEET,
          cft.PACFIN_GEAR_CODE GRID,
-         cft.LANDING_DATE TDATE,
-         cft.PACFIN_PORT_CODE PCID,
-         cft.DEALER_NUM PROC,
-         cft.DAHL_GROUNDFISH_CODE DAHL_SECTOR,
+         cft.LANDING_DATE,
          SUM(landed_weight_lbs) AS LBS,
          SUM(landed_weight_mtons) as MT,
          SUM(round_weight_mtons) as rMT
diff --git a/inst/extdata/sql/pacfin_spec.sql b/inst/extdata/sql/pspec.sql
similarity index 100%
rename from inst/extdata/sql/pacfin_spec.sql
rename to inst/extdata/sql/pspec.sql
diff --git a/man/hakedata_sql_password.Rd b/man/hakedata_sql_password.Rd
index c729b98..ce64ec7 100644
--- a/man/hakedata_sql_password.Rd
+++ b/man/hakedata_sql_password.Rd
@@ -4,31 +4,31 @@
 \alias{hakedata_sql_password}
 \title{Find username and passwords for databases}
 \usage{
-hakedata_sql_password(database = c("NORPAC", "PacFIN"), file)
+hakedata_sql_password(password_file, database = c("NORPAC", "PacFIN"))
 }
 \arguments{
-\item{database}{A vector of character values specifying which databases you
-want login information for.}
+\item{password_file}{A file path specifying where to find the passwords. The
+path can be full or relative to your current working directory. If a path
+is provided, the file must be a text file with one password per line for
+each database in the \code{database} argument and in that order. The default for
+\code{database} means that the file has two lines, where the first line is the
+NORPAC password and the second line is the PacFIN password. These passwords
+should not be surrounded with quotes. If a file name is not provided, which
+is the default behavior, then the user will be prompted for their
+passwords. This also happens if the file cannot be found given the path
+provided. These passwords are needed because the databases store
+confidential information.}
 
-\item{file}{A file path specifying where to find the passwords.
-The path can be full or relative to your current working directory.
-If a path is provided, the file that it leads to
-must be for a text file with one password per line for each database
-in the \code{database} argument and in that order.
-The default for \code{database} means that the file would have two lines,
-where the first line is the NORPAC password and
-the second line is the PacFIN password.
-These passwords should not be surrounded with quotes.
-If a file name is not provided, which is the default behaviour, then
-the user will be prompted for their passwords. This also happens if
-the file cannot be found given the path provided.}
+\item{database}{A vector of character strings indicating which databases you
+want to pull information from. Options include \code{c("NORPAC", "PacFIN")}, one
+or both (the default) can be specified. Note that case matters here.}
 }
 \value{
-A list with two entries, \code{usernames} and \code{passwords}.
-Each element will have the same number of entries as the
-input argument \code{database} and be named using the elements of \code{database}.
-The list is invisibly returned to ensure that the passwords are not printed
-to the screen. Thus, the function call should be assigned to an object.
+A list with two entries, \code{usernames} and \code{passwords}. Each entry contain a
+named vector with one element for each element in the input argument
+\code{database}. The list is invisibly returned to ensure that the passwords are
+not printed to the screen. Thus, the function call should be assigned to an
+object.
 }
 \description{
 Find the username and passwords specific given the username of the computer
@@ -39,10 +39,10 @@ confidential information about landings of Pacific Hake.
 \dontrun{
 # Prompted for passwords for each database
 test <- hakedata_sql_password()
-# Prompted for passwords for each database because file is not found
-test <- hakedata_sql_password(file = "doesnotwork.txt")
+# Prompted for passwords for each database because password_file is not found
+test <- hakedata_sql_password(password_file = "doesnotwork.txt")
 # On Kelli Johnson's machine, the following will work
-test <- hakedata_sql_password(file = "password.txt")
+test <- hakedata_sql_password(password_file = "password.txt")
 # Doesn't work because entry for database is not in the list
 # of allowed databases, i.e., the default for `database`.
 test <- hakedata_sql_password(database = "onedatabase")
diff --git a/man/process_age_sea.Rd b/man/process_age_sea.Rd
index 851c5e2..66c8842 100644
--- a/man/process_age_sea.Rd
+++ b/man/process_age_sea.Rd
@@ -6,7 +6,7 @@
 \usage{
 process_age_sea(
   atsea.ages = get_local(file = "atsea.ages.Rdat"),
-  ncatch = get_local(file = "norpac_catch.Rdat"),
+  ncatch = get_local(file = "ncatch.Rdat"),
   years = 2008:hakedata_year(),
   ages = 1:15,
   files = fs::path(hakedata_wd(), paste0("us-", c("cp", "ms"), "-age-proportions.csv")),
diff --git a/man/process_catch_norpac.Rd b/man/process_catch_norpac.Rd
index 8d71a46..0e573b8 100644
--- a/man/process_catch_norpac.Rd
+++ b/man/process_catch_norpac.Rd
@@ -5,7 +5,7 @@
 \title{Process NORPAC catches}
 \usage{
 process_catch_norpac(
-  ncatch = get_local(file = "norpac_catch.Rdat"),
+  ncatch = get_local(file = "ncatch.Rdat"),
   nyears = 5,
   savedir = hakedata_wd()
 )
diff --git a/man/process_catch_pacfin.Rd b/man/process_catch_pacfin.Rd
index 0d72801..45f0016 100644
--- a/man/process_catch_pacfin.Rd
+++ b/man/process_catch_pacfin.Rd
@@ -5,7 +5,7 @@
 \title{Print summaries and figures for catches from PacFIN}
 \usage{
 process_catch_pacfin(
-  pcatch = get_local(file = "pacfin_catch.Rdat"),
+  pcatch = get_local(file = "pcatch.Rdat"),
   nyears = 5,
   savedir = hakedata_wd()
 )
@@ -19,9 +19,7 @@ Otherwise, the R object is typically stored in the \code{hakedata} environment.}
 where the most recent year will be sequenced back in time \code{nyears}.
 The default is typically five years.}
 
-\item{savedir}{A full path to a directory where you want files saved.
-The default is to save them in a folder found using \code{\link[=hakedata_wd]{hakedata_wd()}},
-which returns a path specific to the personal preferences of JTC members.}
+\item{savedir}{A string specifying the path of interest.}
 }
 \value{
 The following files are saved to the disk:
diff --git a/man/process_weight_at_age.Rd b/man/process_weight_at_age.Rd
index 3496f3e..2ebc6ec 100644
--- a/man/process_weight_at_age.Rd
+++ b/man/process_weight_at_age.Rd
@@ -7,10 +7,11 @@
 process_weight_at_age(
   dir = hakedata_wd(),
   maxage = 15,
-  yrs = 2008:hakedata_year(),
+  max_year = hakedata_year(),
   navgyears = 5,
   nforecast = 4,
-  maturity = maturity_at_age
+  maturity = maturity_at_age,
+  output_wtatage_file_name = "wtatage.ss"
 )
 }
 \arguments{
@@ -24,17 +25,15 @@ This will correspond to the maximum age group in the data, not in the
 model because SS can model many ages when there is only information in
 the data for a few ages.}
 
-\item{yrs}{A vector of years to search for recent data. Typically,
-the vector starts with 2008 and ends with the most recent year
-of data. This will allow files created from \code{process_weight_at_age_US()} to
-be included in the analysis, i.e., recent US data. Typically, you
-should not have to change this value from the default entry.}
+\item{max_year}{A four-digit integer specifying the maximum year of data
+that you want to include in the weight-at-age data. The default is the
+last year of data found using \code{\link[=hakedata_year]{hakedata_year()}}.}
 
-\item{navgyears}{The number of early and late years to average since
-1975 and \code{max(yrs)} for the early and late analysis asked for
-by the Scientific Review Group in 2017. The argument can be a single
-value or a vector of two values, where in the latter case the second
-value will be used for the most recent time period.}
+\item{navgyears}{The number of early and late years to average since 1975 and
+\code{max_year} for the early and late analysis asked for by the Scientific
+Review Group in 2017. The argument can be a single value or a vector of two
+values, where in the latter case the second value will be used for the most
+recent time period.}
 
 \item{nforecast}{The number of years to forecast into the future.
 Typically, this is three for the hake assessment and will lead to
diff --git a/man/pull_US_data.Rd b/man/pull_US_data.Rd
new file mode 100644
index 0000000..25d0260
--- /dev/null
+++ b/man/pull_US_data.Rd
@@ -0,0 +1,118 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pull_US_data.R
+\name{pull_US_data}
+\alias{pull_US_data}
+\title{Pull data from U.S. databases for Pacific Hake}
+\usage{
+pull_US_data(
+  password_file,
+  database = c("NORPAC", "PacFIN"),
+  start_year = 1900,
+  end_year = hakedata_year(),
+  save_dir = hakedata_wd()
+)
+}
+\arguments{
+\item{password_file}{A file path specifying where to find the passwords. The
+path can be full or relative to your current working directory. If a path
+is provided, the file must be a text file with one password per line for
+each database in the \code{database} argument and in that order. The default for
+\code{database} means that the file has two lines, where the first line is the
+NORPAC password and the second line is the PacFIN password. These passwords
+should not be surrounded with quotes. If a file name is not provided, which
+is the default behavior, then the user will be prompted for their
+passwords. This also happens if the file cannot be found given the path
+provided. These passwords are needed because the databases store
+confidential information.}
+
+\item{database}{A vector of character strings indicating which databases you
+want to pull information from. Options include \code{c("NORPAC", "PacFIN")}, one
+or both (the default) can be specified. Note that case matters here.}
+
+\item{start_year}{An integer value specifying the start year of data to
+include in the extraction. The default is 1900 to include all the data
+possible, including years where only a foreign fleet fished.}
+
+\item{end_year}{An integer value specifying the final year of data to include
+in the extraction. The default uses \code{\link[=hakedata_year]{hakedata_year()}} to determine the
+terminal year.}
+
+\item{save_dir}{A string specifying the full path to a directory where you
+want files saved. The default is to save them in a folder found using
+\code{\link[=hakedata_wd]{hakedata_wd()}}, which returns a path specific to the personal preferences
+of JTC members.}
+}
+\value{
+An environment or \code{list} with several objects pulled from the desired
+databases are invisibly returned. Additionally, several \code{.Rdat} files are
+saved to the disk in a newly created folder called \code{extractedData} inside of
+the directory passed to save_dir. The following files are saved:
+\itemize{
+\item nages.Rdat
+\item nweight.Rdat
+\item ncatch.Rdat
+\item pcatch.Rdat
+\item page.Rdat
+}
+}
+\description{
+Extraction of
+\itemize{
+\item catch data,
+\item weight measurements,
+\item length measurements,
+\item age reads, and
+\item management quantities
+from NORPAC and PacFIN databases for the Pacific Hake assessment. Other,
+additional, databases may be added in the future through the \verb{database =}
+argument.
+}
+}
+\details{
+\code{pull_US_data()} includes several steps, outlined below, and leads to many
+files being saved to your computer. Originally, files were \strong{NOT}
+overwritten. In 2022, Kelli F. Johnson changed the code to not save previous
+downloads to save on disk space, reduce the storing of redundant information,
+and simplify the code base that needed maintenance.
+\itemize{
+\item Find the folder where the data should be saved with \code{\link[=hakedata_wd]{hakedata_wd()}}.
+\item Extract data from NORPAC.
+\itemize{
+\item Catch data
+\item Weight and age data
+\item Squash table of ages (that also includes lengths)
+\item Foreign ages
+\item Species list
+}
+\item Extract data from PacFIN.
+\itemize{
+\item Catch data
+\item Age, length, and weight data from bds table
+\item At-sea data
+\item Management quantities such as allowable catch limits (ACL) for the
+Pacific Fisheries Management Council, i.e., U.S. quantities of interest.
+}
+\item Save each object to the disk in the "extractedData" directory in
+\code{\link[=hakedata_wd]{hakedata_wd()}}.
+}
+}
+\examples{
+\dontrun{
+# An environment with objects is returned
+dataenv <- pull_US_data()
+# Access individual objects using `get()`
+head(get("ncatch", envir = dataenv))
+# Access individual objects pretending the environment is a list
+dataenv[["ncatch"]][1:5, ]
+}
+
+}
+\seealso{
+\itemize{
+\item \code{\link[=hakedata_year]{hakedata_year()}}
+\item \code{\link[=hakedata_wd]{hakedata_wd()}}
+}
+}
+\author{
+Kelli F. Johnson
+}
diff --git a/man/pull_database.Rd b/man/pull_database.Rd
index 06f4c07..7d5ed5a 100644
--- a/man/pull_database.Rd
+++ b/man/pull_database.Rd
@@ -42,9 +42,7 @@ If this argument is \code{NULL}, users will be prompted
 for their passwords.
 Passwords are needed because the databases store confidential data.}
 
-\item{savedir}{A full path to a directory where you want files saved.
-The default is to save them in a folder found using \code{\link[=hakedata_wd]{hakedata_wd()}},
-which returns a path specific to the personal preferences of JTC members.}
+\item{savedir}{A string specifying the path of interest.}
 }
 \value{
 An environment or \code{list} with several objects pulled from the
diff --git a/man/roxygen/templates/savedir.R b/man/roxygen/templates/savedir.R
deleted file mode 100644
index 8b86439..0000000
--- a/man/roxygen/templates/savedir.R
+++ /dev/null
@@ -1,4 +0,0 @@
-
-#' @param savedir A full path to a directory where you want files saved.
-#'   The default is to save them in a folder found using [hakedata_wd()],
-#'   which returns a path specific to the personal preferences of JTC members.
diff --git a/man/roxygen/templates/species.R b/man/roxygen/templates/species.R
index e06f850..da42b39 100644
--- a/man/roxygen/templates/species.R
+++ b/man/roxygen/templates/species.R
@@ -5,4 +5,3 @@
 #' value is often called the `OBS` species code in observer landings from the
 #' Alaska Fisheries Science Center and is not the `AKR` species code.
 #' For example, `206` is Pacific Hake and is the default.
-#' For PacFIN data, this will be the entry in `SPID`.
diff --git a/tests/testthat/setup-getdatabase.R b/tests/testthat/setup-getdatabase.R
index f5a20b7..39401a9 100644
--- a/tests/testthat/setup-getdatabase.R
+++ b/tests/testthat/setup-getdatabase.R
@@ -1,9 +1,8 @@
 # Get the data bases from a saved file
 test_dir_data <- file.path(hakedata_wd(), "extractedData")
 
-load(file.path(test_dir_data, "pcatchatsea.Rdat"))
-load(file.path(test_dir_data, "pacfin_catch.Rdat"))
+load(file.path(test_dir_data, "pcatch.Rdat"))
 load(file.path(test_dir_data, "NORPACdomesticCatch.Rdat"))
 load(file.path(test_dir_data, "atsea.ages.Rdat"))
-load(file.path(test_dir_data, "atsea.ageWt.Rdat"))
+load(file.path(test_dir_data, "nweight.Rdat"))
 load(file.path(test_dir_data, "page.Rdat"))
diff --git a/tests/testthat/teardown-getdatabase.R b/tests/testthat/teardown-getdatabase.R
index b96dc2c..fac9c35 100644
--- a/tests/testthat/teardown-getdatabase.R
+++ b/tests/testthat/teardown-getdatabase.R
@@ -1,8 +1,7 @@
 # tear down getdatabase
 rm(atsea.ages)
-rm(atsea.ageWt)
+rm(nweight)
 rm(page)
 rm(ncatch)
 rm(pcatch)
-rm(pcatchatsea)
 rm(test_dir_data)
diff --git a/tests/testthat/test-catchesequal.R b/tests/testthat/test-catchesequal.R
index 6398465..d9c0eba 100644
--- a/tests/testthat/test-catchesequal.R
+++ b/tests/testthat/test-catchesequal.R
@@ -1,23 +1,3 @@
-test_that("Catches are within 4 mt", {
-  ncatch_month <- read.csv(
-    file.path(
-      dirname(test_dir_data),
-      "Catches", "us-cp-catch-by-month.csv"
-    )
-  )
-  diffs <- merge(
-    aggregate(MT ~ YEAR + MONTH, data = pcatchatsea, sum, na.rm = TRUE),
-    ncatch_month,
-    by.x = c("YEAR", "MONTH"), by.y = c("year", "month"), all = TRUE
-  )
-  diffs <- diffs[order(diffs$YEAR, diffs$MONTH), ]
-  diffs$diff <- diffs$MT - diffs$catch
-  expect_true("diff" %in% colnames(diffs))
-  # expect_lt(max(abs(diffs$diff), na.rm = TRUE), 4.0,
-  #   label = "Maximum abs(difference) in catches")
-  # todo: make this test better
-})
-
 test_that("Catches are good", {
   savedir <- hakedata_wd()
   quotas <- read.csv(file.path(savedir, "Catches", "quotas.csv"),