Skip to content

Commit

Permalink
Add missing 2019 data [minor] (#124)
Browse files Browse the repository at this point in the history
* Clean mercury data format

* Clean colonies

* Replan count cleaning script

* Update colony shapefiles

* Add 2019 maxcount data
  • Loading branch information
gmyenni authored Oct 30, 2024
1 parent 983fc87 commit ab5a2f6
Show file tree
Hide file tree
Showing 15 changed files with 15,951 additions and 15,223 deletions.
7,673 changes: 3,954 additions & 3,719 deletions Counts/maxcounts.csv

Large diffs are not rendered by default.

20,211 changes: 10,310 additions & 9,901 deletions Counts/maxcounts_under40.csv

Large diffs are not rendered by default.

176 changes: 76 additions & 100 deletions DataCleaningScripts/clean_counts.R
Original file line number Diff line number Diff line change
@@ -1,64 +1,95 @@
#' Functions used to reshape and clean count data from field format
#'
# Reshapes and cleans max count data (2022->)
# Reads count data from original excel files, reshapes from wide (dates as cols) to long format,
# corrects data format errors, appends to long timeseries file

`%>%` <- magrittr::`%>%`

#' Reshapes and cleans max count data (2022->)
#'
#'
#'
#' Reads count data from original excel files, reshapes from wide (dates as cols) to long format,
#' corrects data format errors, appends to long timeseries file
#'
#' data_path <- "../Dropbox (UFL)/Everglades/2023 Data/2023 Final Report Work/SFWMD Report_Table_2023.xlsx"
#' year <- 2023
clean_count_data <- function(data_path, year) {
############################# Get raw data ####################################################
year <- 2019
data_path <- "~/Dropbox (UFL)/Everglades/Reports/2019 Reports/SFWMD report table 2019.xlsx"
data_path <- "~/Desktop/maxcount_2019.xlsx"
SFWMD_report_table_2019 <- readxl::read_excel(data_path, sheet = "Appendix", skip = 1)

############################ Build data tables #######################################

colonies <- read.csv("SiteandMethods/colonies.csv") %>%
dplyr::mutate(group_id = as.numeric(group_id),
latitude = as.numeric(latitude),
longitude = as.numeric(longitude))

colonies <- read.csv("SiteandMethods/colonies.csv")
species <- read.csv("SiteandMethods/species_list.csv")
species <- read.csv("SiteandMethods/species_list.csv")
counts <- read.csv("Counts/maxcounts.csv")
under40 <- read.csv("Counts/maxcounts_under40.csv")

tab_names <- readxl::excel_sheets(path = data_path)

data_raw <- readxl::read_excel(path = data_path, sheet = "Appendix",
col_names = TRUE, col_types = "text", skip=1) %>%
dplyr::rename_with(tolower) %>%
dplyr::rename(smda="unid. small dark.") %>%
dplyr::rename(smwh="unid. small white") %>%
dplyr::rename(smwh="unid. small wht.") %>%
dplyr::rename(lawh="unid. large wht.") %>%
dplyr::mutate(colony_old = colony,
colony = tolower(colony),
colony = gsub(" ", "_", colony),
colony = gsub("/", "_", colony),
colony = replace(colony, colony=="63_no_name", "63"),
colony = replace(colony, colony=="71_canal_junction", "canal_junction"),
colony = replace(colony, colony %in% c("63_no_name","63_006"), "63"),
colony = replace(colony, colony %in% c("71_canal_junction","71"), "canal_junction"),
colony = replace(colony, colony=="78_canal_north", "canal_north"),
colony = replace(colony, colony %in% c("3b_ramp_80","3b_ramp"), "3b_boat_ramp"),
colony = replace(colony, colony=="89_venus", "venus"),
colony = replace(colony, colony=="austere", "auster"),
colony = replace(colony, colony=="cooklox11", "lox111"),
colony = replace(colony, colony %in% c("cooklox11", "cooknc3"), "lox111"),
colony = replace(colony, colony=="cooknc4", "lox_nc4"),
colony = replace(colony, colony=="enlil_epona", "enlil"),
colony = replace(colony, colony=="jetport_new_64", "jetport_new"),
colony = replace(colony, colony=="loxwest", "lox_west"),
colony = replace(colony, colony=="cooknc1(77_78)", "lox_nc1"),
colony = replace(colony, colony=="cooknc2(76)", "vesta"),
colony = replace(colony, colony=="loxramp_011", "lox_ramp"),
colony = replace(colony, colony %in% c("tyr_lox73","tyr"), "lox73"),
colony = replace(colony, colony %in% c("tyr_lox73","tyr", "lox73_tyr"), "lox73"),
colony = replace(colony, colony=="vulture_007", "vulture"),
colony = replace(colony, colony=="1219_draco", "draco"),
colony = replace(colony, colony=="990_frodo", "frodo"),
colony = replace(colony, colony=="38", "38_185"),
colony = replace(colony, colony=="51", "juno"),
colony = replace(colony, colony %in% c("51", "51_juno"), "juno"),
colony = replace(colony, colony=="lox11", "outer_lox111_south"),
colony = replace(colony, colony=="little_d_little_a", "little_a"),
colony = replace(colony, colony=="112", "3665"),
colony = replace(colony, colony=="1362", "487"),
colony = replace(colony, colony=="1470", "1888"),
colony = replace(colony, colony=="1379", "1824"),
colony = replace(colony, colony=="14", "1351")) %>%
colony = replace(colony, colony %in% c("1362", "739", "487"), "col487"),
colony = replace(colony, colony %in% c("1470","576", "98"), "1888"),
colony = replace(colony, colony %in% c("1379", "766_57_nc_2018"), "1824"),
colony = replace(colony, colony=="14", "1351"),
colony = replace(colony, colony=="75", "3700"),
colony = replace(colony, colony=="610_67_nc_2018", "67"),
colony = replace(colony, colony=="644", "1573"),
colony = replace(colony, colony=="2019_greg_colony_1", "colony13"),
colony = replace(colony, colony %in% c("rodgers_river_bay_large_island","rodgers_river_bay_small_island"), "rodgers_river_bay"),
colony = replace(colony, colony=="grossman_ridge_willowhead", "grossman_willowhead")) %>%
dplyr::left_join(colonies[,1:2], by = dplyr::join_by(colony))

new_colonies <- data_raw[-which(data_raw$colony %in% colonies$colony),]

# only colonies < 40 should be left in new_colonies


######################## Add new colony info to colonies table ################################

### New colony
# colonies[dim(colonies)[1]+1,]=c(max(colonies$group_id,na.rm=T) + 1,
# "colony","region","subregion",latitude,longitude,"aka","Display Name")

### Update colony
# colonies[colonies$colony=="colony13",]$aka="Colony 13, 2019 GREG colony 1"

colonies <- colonies %>%
dplyr::mutate(group_id = as.numeric(group_id),
latitude = as.numeric(latitude),
longitude = as.numeric(longitude)) %>%
dplyr::arrange(group_id)
write.table(colonies, "SiteandMethods/colonies.csv", row.names = FALSE, col.names = TRUE,
na = "", sep = ",", quote = c(7,8))

############################### Shape max count new data ###################################

new_data <- data_raw %>%
dplyr::select(-c(wca,total)) %>%
dplyr::filter(colony %in% colonies$colony) %>%
Expand All @@ -83,86 +114,31 @@ clean_count_data <- function(data_path, year) {
print(unique(new_data$colony[which(!(new_data$colony %in% colonies$colony))]))
print(unique(new_data$species[which(!(new_data$species %in% species$species))]))
}

############################## Shape under 40 new data ##############################################
### Only colonies < 40 should be left in new_colonies

under_40 <- new_colonies %>%
under_40_new <- new_colonies %>%
dplyr::filter(!is.na(latitude)) %>%
dplyr::select(-"smda") %>%
dplyr::rowwise() %>%
dplyr::mutate(notes = dplyr::case_when(
any(dplyr::c_across(greg:anhi)=="***", na.rm = T) ~ "1s indicate presence",
TRUE ~ ""))
under_40[under_40=="***"] <- "1"
under_40 <- under_40 %>%
dplyr::mutate_at(3:20,as.numeric) %>%
dplyr::mutate(year=as.numeric(year), dcco = NA, grhe=NA, smhe=NA, lawh=NA, lada=NA) %>%
dplyr::select("group_id","year","colony","colony_old","latitude","longitude","wca","greg","whib","wost","gbhe","rosp","sneg","anhi","trhe","bcnh","lbhe","ycnh","glib","caeg","dcco","grhe","smhe","lawh","lada","smda","smwh","notes","total")
under_40_new[under_40_new=="***"] <- "1"
under_40_new <- under_40_new %>%
dplyr::mutate(year=as.numeric(year), dcco = NA, smhe=NA, lada=NA) %>%
dplyr::select("group_id","year","colony","colony_old","latitude","longitude","wca","greg","whib","wost","gbhe","rosp","sneg","anhi","trhe","bcnh","lbhe","ycnh","glib","caeg","dcco","grhe","smhe","lawh","lada","smwh","total","notes") %>%
dplyr::mutate_at(c("group_id","year","latitude","longitude","greg","whib","wost","gbhe","rosp","sneg","anhi","trhe","bcnh","lbhe","ycnh","glib","caeg","dcco","grhe","smhe","lawh","lada","smwh","total"),as.numeric)

return(list(new_data=new_data, new_colonies=new_colonies, under_40=under_40))
}

#' Functions customized to old data (-2021)
#'

#' Reshapes and cleans max count data (1994-2021)
#'
#'
#'
#' Reads count data from original excel files, reshapes from wide (dates as cols) to long format,
#' corrects data format errors, appends to long timeseries file

clean_count_data_old <- function(data_path, year) {
################## Move under 40 data to main table for new colonies ###################################
new_colony_list <- c()

colonies <- read.csv("SiteandMethods/colonies.csv")
species <- read.csv("SiteandMethods/species_list.csv")
############################## Save data ############################################################
counts <- counts %>% dplyr::bind_rows(new_data) %>% dplyr::arrange(year,group_id)
write.table(counts, "Counts/maxcounts.csv", row.names = FALSE, na = "", sep = ",", quote = 9)

tab_names <- readxl::excel_sheets(path = data_path)
tab_names <- tab_names[tab_names != "key"]
tab_names <- tab_names[!startsWith(tab_names ,"Other")]
tab_names <- tab_names[!startsWith(tab_names ,"Overview")]
tab_names <- tab_names[!startsWith(tab_names ,"Dataset Headers")]
data_raw <- as.data.frame(lapply(tab_names[1], function(x) readxl::read_excel(path = data_path, sheet = x,
col_names = TRUE, col_types = "text")))

new_data <- data_raw %>%
dplyr::rename_with(~ tolower(gsub(".", "_", .x, fixed = TRUE))) %>%
dplyr::rename(type = type_of_count) %>%
tidyr::pivot_longer(cols = !1:6,
names_to = "species",
values_to = "count") %>%

dplyr::mutate(year = year,
date = as.Date(as.integer(date), origin="1899-12-30"),
notes = "",
colony = tolower(colony),
colony = gsub(" ", "_", colony),
colony = gsub("/.", "_", colony),
colony = replace(colony, colony=="6th_bridge_whib", "6th_bridge"),
colony = replace(colony, colony=="011_ox_ramp", "lox_ramp_011"),
colony = replace(colony, colony=="lox73", "lox_73_tyr"),
type = tolower(type),
type = replace(type, type == "ground count", "ground"),
notes = replace(notes, type=="est. uav", "estimated"),
type = replace(type, type=="est. uav", "uav"),
notes = replace(notes, count=="***", "presence"),
count = replace(count, count=="***", 1),
species = replace(species, species %in% c("ani"), "anhi"),
species = replace(species, species %in% c("unkn_smwh"), "smwt")) %>%

dplyr::filter(!is.na(count)) %>%

dplyr::mutate(date = as.Date(date),
year = as.numeric(year),
latitude = as.numeric(latitude),
longitude = as.numeric(longitude),
count = as.numeric(count)) %>%
dplyr::select(year, date, colony, wca, latitude, longitude, type, behavior, species, count, notes)

if(!all(new_data$colony %in% colonies$colony)|
!all(new_data$species %in% species$species)|
!all(format(as.Date(new_data$date),"%Y")==year)) {
print(unique(new_data$colony[which(!(new_data$colony %in% colonies$colony))]))
print(unique(new_data$species[which(!(new_data$species %in% species$species))]))
}

return(new_data)
}

under40 <- under40 %>% dplyr::bind_rows(under_40_new) %>% dplyr::arrange(year)
write.table(under40, "Counts/maxcounts_under40.csv", row.names = FALSE, col.names = TRUE,
na = "", sep = ",", quote = 28)

19 changes: 0 additions & 19 deletions DataCleaningScripts/get_counts.R

This file was deleted.

43 changes: 43 additions & 0 deletions DataCleaningScripts/get_hg.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#' Used to clean and validate mercury data from field format
#'

`%>%` <- magrittr::`%>%`

colonies <- read.csv("SiteandMethods/colonies.csv")
feather_hg <- read.csv("Hg/feather_hg.csv")



new_hg <- new_hg %>%
dplyr::rename_with(tolower) %>%
dplyr::mutate(colony = tolower(colony),
colony = gsub(" ", "_", colony),
colony = gsub("/", "_", colony),
colony = gsub("/.", "_", colony),
colony = replace(colony, colony=="alleynorth", "alley_north"),
colony = replace(colony, colony=="l67", "horus"),
colony = replace(colony, colony=="mudcanal", "mud_canal_south"),
colony = replace(colony, colony=="tameast", "tamiami_east"),
colony = replace(colony, colony=="3bmud" , "heron_alley"),
colony = replace(colony, colony=="falsel67", "false_l67"),
colony = replace(colony, colony=="tamwest", "tamiami_west"),
colony = replace(colony, colony=="3bmud_east", "mud_east"),
colony = replace(colony, colony=="cypresscity", "cypress_city"),
colony = replace(colony, colony=="6bridge", "6th_bridge"),
colony = replace(colony, colony=="rook.br.", "rookery_branch"),
colony = replace(colony, colony=="andytown", "andytown_north"),
colony = replace(colony, colony=="cuthbert", "cuthbert_lake"),
colony = replace(colony, colony=="paurotis", "paurotis_pond")) %>%
dplyr::mutate(colony_year = paste(colony, "_", year),
unique_feather_id = paste(colony_year, "_", nest_feather_id))

if(!all(new_hg$colony %in% colonies$colony)) {
print(unique(new_hg$colony[which(!(new_hg$colony %in% colonies$colony))]))
}


new_hg <- feather_hg %>% dplyr::select(year,colony,nest,feather_id,hg,culmen,tarsus,mass,date,everything(),notes)


write.table(feather_hg, "Hg/feather_hg.csv", row.names = FALSE, col.names = TRUE,
na = "", sep = ",", quote = 24)
67 changes: 67 additions & 0 deletions DataCleaningScripts/old_counts.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,70 @@ write.csv(species, "SiteandMethods/species_list.csv", row.names = FALSE, na = ""

colonies <- colonies %>% dplyr::arrange(colony)
write.csv(colonies, "SiteandMethods/colonies.csv", row.names = FALSE, na = "", quote = c(7,8))

#' Functions customized to old data (-2021)
#'

#' Reshapes and cleans max count data (1994-2021)
#'
#'
#'
#' Reads count data from original excel files, reshapes from wide (dates as cols) to long format,
#' corrects data format errors, appends to long timeseries file

clean_count_data_old <- function(data_path, year) {

colonies <- read.csv("SiteandMethods/colonies.csv")
species <- read.csv("SiteandMethods/species_list.csv")

tab_names <- readxl::excel_sheets(path = data_path)
tab_names <- tab_names[tab_names != "key"]
tab_names <- tab_names[!startsWith(tab_names ,"Other")]
tab_names <- tab_names[!startsWith(tab_names ,"Overview")]
tab_names <- tab_names[!startsWith(tab_names ,"Dataset Headers")]
data_raw <- as.data.frame(lapply(tab_names[1], function(x) readxl::read_excel(path = data_path, sheet = x,
col_names = TRUE, col_types = "text")))

new_data <- data_raw %>%
dplyr::rename_with(~ tolower(gsub(".", "_", .x, fixed = TRUE))) %>%
dplyr::rename(type = type_of_count) %>%
tidyr::pivot_longer(cols = !1:6,
names_to = "species",
values_to = "count") %>%

dplyr::mutate(year = year,
date = as.Date(as.integer(date), origin="1899-12-30"),
notes = "",
colony = tolower(colony),
colony = gsub(" ", "_", colony),
colony = gsub("/.", "_", colony),
colony = replace(colony, colony=="6th_bridge_whib", "6th_bridge"),
colony = replace(colony, colony=="011_ox_ramp", "lox_ramp_011"),
colony = replace(colony, colony=="lox73", "lox_73_tyr"),
type = tolower(type),
type = replace(type, type == "ground count", "ground"),
notes = replace(notes, type=="est. uav", "estimated"),
type = replace(type, type=="est. uav", "uav"),
notes = replace(notes, count=="***", "presence"),
count = replace(count, count=="***", 1),
species = replace(species, species %in% c("ani"), "anhi"),
species = replace(species, species %in% c("unkn_smwh"), "smwt")) %>%

dplyr::filter(!is.na(count)) %>%

dplyr::mutate(date = as.Date(date),
year = as.numeric(year),
latitude = as.numeric(latitude),
longitude = as.numeric(longitude),
count = as.numeric(count)) %>%
dplyr::select(year, date, colony, wca, latitude, longitude, type, behavior, species, count, notes)

if(!all(new_data$colony %in% colonies$colony)|
!all(new_data$species %in% species$species)|
!all(format(as.Date(new_data$date),"%Y")==year)) {
print(unique(new_data$colony[which(!(new_data$colony %in% colonies$colony))]))
print(unique(new_data$species[which(!(new_data$species %in% species$species))]))
}

return(new_data)
}
Loading

0 comments on commit ab5a2f6

Please sign in to comment.