From 894117eb7b662c207b03ec1e0eaec14b209d8ea1 Mon Sep 17 00:00:00 2001 From: ecophilina Date: Fri, 15 Nov 2024 13:36:37 -0800 Subject: [PATCH] add column that indicates when otoliths were the type of age structure collected, and an option to include an additional column for alternate types of structures collected --- R/get-all-survey-samples.R | 12 ++++++++++++ inst/sql/get-all-survey-samples.sql | 15 +++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/R/get-all-survey-samples.R b/R/get-all-survey-samples.R index 6347296..c54fea0 100644 --- a/R/get-all-survey-samples.R +++ b/R/get-all-survey-samples.R @@ -43,6 +43,9 @@ #' @param return_dna_info Should DNA container ids and sample type be returned? #' This can create duplication of specimen ids for some species. Defaults to #' FALSE. +#' @param return_specimen_type Should non-otolith structure types be returned? +#' This can create duplication of specimen ids for some species. Defaults to +#' FALSE. #' @param quiet_option Default option, `"message"`, suppresses messages from #' sections of code with lots of `join_by` messages. Any other string will allow #' messages. @@ -63,6 +66,7 @@ get_all_survey_samples <- function(species, ssid = NULL, remove_bad_data = TRUE, remove_duplicates = TRUE, return_dna_info = FALSE, + return_specimen_type = FALSE, drop_na_columns = TRUE, quiet_option = "message") { .q <- read_sql("get-all-survey-samples.sql") @@ -115,6 +119,14 @@ get_all_survey_samples <- function(species, ssid = NULL, .d <- run_sql("GFBioSQL", .q) + ## ALTERNATE_SPECIMEN_TYPE can cause duplication for some species with multiple types collected per individual + ## Could do something about record duplication with multiple samples like combining or not returning them? + if (!return_specimen_type) { + .d <- .d |> + select(-ALTERNATE_SPECIMEN_TYPE) |> + distinct() + } + names(.d) <- tolower(names(.d)) if (nrow(.d) < 1) { diff --git a/inst/sql/get-all-survey-samples.sql b/inst/sql/get-all-survey-samples.sql index bf47daf..9d6a2bd 100644 --- a/inst/sql/get-all-survey-samples.sql +++ b/inst/sql/get-all-survey-samples.sql @@ -38,6 +38,8 @@ SELECT SM.SPECIES_CATEGORY_CODE, SM.SAMPLE_SOURCE_CODE, CASE WHEN SC.SPECIMEN_COLLECTED_IND = 'Y' OR SC.SPECIMEN_COLLECTED_IND = 'y' THEN 1 ELSE 0 END AS AGE_SPECIMEN_COLLECTED, + CASE WHEN SC1.SPECIMEN_COLLECTED_IND = 'Y' OR SC1.SPECIMEN_COLLECTED_IND = 'y' THEN 1 ELSE 0 END AS OTOLITH_COLLECTED, + CA.COLLECTED_ATTRIBUTE_DESC AS ALTERNATE_SPECIMEN_TYPE, DNA.STORAGE_CONTAINER_TYPE_CODE AS DNA_SAMPLE_TYPE, DNA.STORAGE_CONTAINER_SUB_ID AS DNA_CONTAINER_ID, CASE WHEN SM.GEAR_CODE IN (1, 6, 8, 11, 14, 16) THEN ISNULL(TRSP.USABILITY_CODE, 0) @@ -82,6 +84,19 @@ SELECT FROM GFBioSQL.dbo.SPECIMEN_COLLECTED WHERE COLLECTED_ATTRIBUTE_CODE BETWEEN 20 AND 25 GROUP BY SAMPLE_ID, SPECIMEN_ID, SPECIMEN_COLLECTED_IND) SC ON SP.SPECIMEN_ID = SC.SPECIMEN_ID AND SP.SAMPLE_ID = SC.SAMPLE_ID + LEFT JOIN (SELECT SAMPLE_ID, + MIN(SPECIMEN_ID) AS SPECIMEN_ID, + SPECIMEN_COLLECTED_IND AS SPECIMEN_COLLECTED_IND + FROM GFBioSQL.dbo.SPECIMEN_COLLECTED + WHERE COLLECTED_ATTRIBUTE_CODE = 20 + GROUP BY SAMPLE_ID, SPECIMEN_ID, SPECIMEN_COLLECTED_IND) SC1 ON SP.SPECIMEN_ID = SC1.SPECIMEN_ID AND SP.SAMPLE_ID = SC1.SAMPLE_ID + LEFT JOIN (SELECT SAMPLE_ID, + MIN(SPECIMEN_ID) AS SPECIMEN_ID, + COLLECTED_ATTRIBUTE_CODE + FROM GFBioSQL.dbo.SPECIMEN_COLLECTED + WHERE COLLECTED_ATTRIBUTE_CODE BETWEEN 21 AND 29 AND (SPECIMEN_COLLECTED_IND = 'Y' OR SPECIMEN_COLLECTED_IND = 'y') + GROUP BY SAMPLE_ID, SPECIMEN_ID, COLLECTED_ATTRIBUTE_CODE) SC2 ON SP.SPECIMEN_ID = SC2.SPECIMEN_ID AND SP.SAMPLE_ID = SC2.SAMPLE_ID + LEFT JOIN COLLECTED_ATTRIBUTE CA ON CA.COLLECTED_ATTRIBUTE_CODE = SC2.COLLECTED_ATTRIBUTE_CODE WHERE TRIP_SUB_TYPE_CODE IN (2, 3) AND ISNULL(G.SURVEY_SERIES_ID, S.SURVEY_SERIES_ID) <> 0 -- insert species here