From feb3015dfa1bd1c8011e2bae2932129f2818a658 Mon Sep 17 00:00:00 2001 From: Will Pearse Date: Wed, 3 Jul 2019 17:04:05 -0600 Subject: [PATCH] Name change --- DESCRIPTION | 23 +++++++++----- NAMESPACE | 4 +-- R/{nacdb.R => MADcomm.R} | 60 ++++++++++++++++++------------------ R/cleaning.R | 10 +++--- R/downloads.R | 7 ++--- R/utility.R | 30 +++++++++--------- man/{nacdb.Rd => MADcomm.Rd} | 14 ++++----- 7 files changed, 78 insertions(+), 70 deletions(-) rename R/{nacdb.R => MADcomm.R} (79%) rename man/{nacdb.Rd => MADcomm.Rd} (83%) diff --git a/DESCRIPTION b/DESCRIPTION index 2033ffc..4915169 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,18 @@ -Package: nacdb +Package: MADcomm Type: Package -Title: Not A Trait DataBase -Version: 1.0 -Date: 2017-05-16 +Title: Make A Database of traits +Version: 1.0-0 Author: William D. Pearse, Maintainer: William D. Pearse -Description: Downloads community presence/absence and abundance data - from published sources, and collates them into an easy-to-use - format for use within R. +Description: Automates the download and assembly of ecological assemblage + data from published sources. This package is not a database, but + rather Makes A Database of communities from published, existing + sources. Those who have contributed community data that is downloaded + should, of course, be appropriately referenced when their data are + used (tools are available within this package to aid that). Some + basic data checking, and functions to perform data checking, are + included within this package, but the user should check their data + before serious use. License: MIT + file LICENSE Depends: R (>= 2.10) @@ -20,6 +25,8 @@ Imports: httr (>= 1.2.1), taxize, readxl, - picante + picante, + neonUtilities, + assertthat RoxygenNote: 6.1.1 Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index 61bfded..fcaaf1f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -43,7 +43,8 @@ export(.stevens.2011) export(.thibault.2011) export(.wearn.2016a) export(.wearn.2016b) -export(nacdb) +export(MADcomm) +importFrom(assertthat,assert_that) importFrom(gdata,drop.levels) importFrom(gdata,ls.funs) importFrom(gdata,read.xls) @@ -61,7 +62,6 @@ importFrom(stats,reshape) importFrom(stats,setNames) importFrom(suppdata,suppdata) importFrom(taxize,gnr_resolve) -importFrom(testdat,sanitize_text) importFrom(utils,data) importFrom(utils,download.file) importFrom(utils,head) diff --git a/R/nacdb.R b/R/MADcomm.R similarity index 79% rename from R/nacdb.R rename to R/MADcomm.R index 3c223e8..19eb01f 100644 --- a/R/nacdb.R +++ b/R/MADcomm.R @@ -1,6 +1,6 @@ -#' Builds a community database +#' Make A Database of community data #' -#' The key function of the nacdb package. When run with defaults, it +#' The key function of the MADcomm package. When run with defaults, it #' will download and build a database of species' traits from all the #' manuscript sources in the package. This totals XXX #' manuscripts/databases, XXX species, and XXX traits. Please note @@ -20,24 +20,24 @@ #' @param cache Folder where cached downloads are stored #' @param delay How long to wait between downloads (to save server #' overload); default is 5 seconds. -#' @return nacdb.data object. XXX +#' @return MADcomm.data object. XXX #' @author Will Pearse; Bodie; etc. #' #@examples #' # Limit the scope of these as they have to work online on servers!... #' #@seealso #' @export #' @importFrom gdata ls.funs -nacdb <- function(cache, datasets, delay=5){ +MADcomm <- function(cache, datasets, delay=5){ #Check datasets if(missing(datasets)){ - datasets <- Filter(Negate(is.function), ls(pattern="^\\.[a-z]*\\.[0-9]+", name="package:nacdb", all.names=TRUE)) + datasets <- Filter(Negate(is.function), ls(pattern="^\\.[a-z]*\\.[0-9]+", name="package:MADcomm", all.names=TRUE)) } else { datasets <- paste0(".", tolower(datasets)) datasets <- gsub("..", ".", datasets, fixed=TRUE) } if(!all(datasets %in% datasets)){ missing <- setdiff(datasets, ls.funs()) - stop("Error: ", paste(missing, collapse=", "), "not in nacdb") + stop("Error: ", paste(missing, collapse=", "), "not in MADcomm") } #Do data loads @@ -79,14 +79,14 @@ nacdb <- function(cache, datasets, delay=5){ site.metadata=do.call(rbind, lapply(output, function(x) x$site.metadata)), study.metadata=do.call(rbind, lapply(output, function(x) x$study.metadata)) ) - class(output) <- "nacdb" + class(output) <- "MADcomm" return(output) } -print.nacdb <- function(x, ...){ +print.MADcomm <- function(x, ...){ # Argument handling - if(!inherits(x, "nacdb")) - stop("'", deparse(substitute(x)), "' must be of type 'nacdb'") + if(!inherits(x, "MADcomm")) + stop("'", deparse(substitute(x)), "' must be of type 'MADcomm'") # Create a simple summary matrix of species and sites in x n.species <- length(unique(species(x))) @@ -98,14 +98,14 @@ print.nacdb <- function(x, ...){ invisible(setNames(c(n.species,n.sites), c("n.species","n.sites"))) } -summary.nacdb <- function(x, ...){ - print.nacdb(x, ...) +summary.MADcomm <- function(x, ...){ + print.MADcomm(x, ...) } -"[.nacdb" <- function(x, sites, spp){ +"[.MADcomm" <- function(x, sites, spp){ # Argument handling - if(!inherits(x, "nacdb")) - stop("'", deparse(substitute(x)), "' must be of type 'nacdb'") + if(!inherits(x, "MADcomm")) + stop("'", deparse(substitute(x)), "' must be of type 'MADcomm'") # Setup null output in case of no match null <- list( @@ -114,7 +114,7 @@ summary.nacdb <- function(x, ...){ site.metadata=data.frame(id=NA,year=NA,name=NA,lat=NA,long=NA,address=NA,other=NA), spp.metadata=data.frame(species=NA, taxonomy=NA, other=NA) ) - class(null) <- "nacdb" + class(null) <- "MADcomm" # Site subsetting if(!missing(sites)){ @@ -145,38 +145,38 @@ summary.nacdb <- function(x, ...){ } species <- function(x, ...){ - if(!inherits(x, "nacdb")) - stop("'", deparse(substitute(x)), "' must be of type 'nacdb'") + if(!inherits(x, "MADcomm")) + stop("'", deparse(substitute(x)), "' must be of type 'MADcomm'") return(unique(x$spp.metadata$species)) - # Return a vector of the sites in nacdb (?) + # Return a vector of the sites in MADcomm (?) } sites <- function(x, ...){ - if(!inherits(x, "nacdb")) - stop("'", deparse(substitute(x)), "' must be of type 'nacdb'") + if(!inherits(x, "MADcomm")) + stop("'", deparse(substitute(x)), "' must be of type 'MADcomm'") return(unique(x$site.metadata$id)) } citations <- function(x){ - if(!inherits(x, "nacdb")) - stop("'", deparse(substitute(x)), "' must be of type 'nacdb'") + if(!inherits(x, "MADcomm")) + stop("'", deparse(substitute(x)), "' must be of type 'MADcomm'") - data(nacdb_citations) - datasets <- Filter(Negate(is.function), ls(pattern="^\\.[a-z]*\\.[0-9]+[a-d]?", name="package:nacdb", all.names=TRUE)) - nacdb.citations$Name <- with(nacdb.citations, paste0(".", tolower(Author), ".", Year)) + data(MADcomm_citations) + datasets <- Filter(Negate(is.function), ls(pattern="^\\.[a-z]*\\.[0-9]+[a-d]?", name="package:MADcomm", all.names=TRUE)) + MADcomm.citations$Name <- with(MADcomm.citations, paste0(".", tolower(Author), ".", Year)) - return(as.character(nacdb.citations$BibTeX.citation[match(datasets, nacdb.citations$Name)])) + return(as.character(MADcomm.citations$BibTeX.citation[match(datasets, MADcomm.citations$Name)])) } # I added this during ARGON, and while it's useful I think I need to # think a little more coherently abotu how to let users interact with # study-level meta-data if(FALSE){ - #' @method subset nacdb + #' @method subset MADcomm #' @export subset.study <- function(x, studies, ...){ - if(!inherits(x, "nacdb")) - stop("'", deparse(substitute(x)), "' must be of type 'nacdb'") + if(!inherits(x, "MADcomm")) + stop("'", deparse(substitute(x)), "' must be of type 'MADcomm'") x$data <- x$data[x$data$study %in% studies,] x$spp.metadata <- x$spp.metadata[x$spp.metadata$study %in% studies,] diff --git a/R/cleaning.R b/R/cleaning.R index 24eed46..cc47b08 100644 --- a/R/cleaning.R +++ b/R/cleaning.R @@ -1,7 +1,7 @@ -clean.nacdb <- function(x){ +clean.MADcomm <- function(x){ # Argument handling - if(!inherits(x, "nacdb")) - stop("'", deparse(substitute(x)), "' must be of type 'nacdb'") + if(!inherits(x, "MADcomm")) + stop("'", deparse(substitute(x)), "' must be of type 'MADcomm'") # Clean up any obvious weirdnesses with the site names @@ -15,12 +15,12 @@ clean.nacdb <- function(x){ # require the addition of some sort of cache, as there will be # *thousands* of species names that need adding in here #' @importFrom taxize gnr_resolve -clean.nacdb.names <- function(x, thresh, ...){ +clean.MADcomm.names <- function(x, thresh, ...){ # Argument handling if(!inherits(x, "natdb")) stop("'", deparse(substitute(x)), "' must be of type 'natdb'") - # This code doesn't work on a nacdb object, probably, but the general structure will + # This code doesn't work on a MADcomm object, probably, but the general structure will spp <- unique(c(unique(x$numeric$species), unique(x$categorical$species))) dwn.spp <- gnr_resolve(spp) dwn.spp <- dwn.spp[!duplicated(dwn.spp$user_supplied_name),] diff --git a/R/downloads.R b/R/downloads.R index be7a78b..97ca72d 100644 --- a/R/downloads.R +++ b/R/downloads.R @@ -5,7 +5,6 @@ #' @importFrom utils data head read.csv read.delim read.table #' @importFrom stats aggregate na.omit reshape #' @importFrom gdata drop.levels -#' @importFrom testdat sanitize_text #' @importFrom readxl read_xlsx read_xls read_excel #' @importFrom neonUtilities loadByProduct #' @export @@ -980,7 +979,7 @@ petermann.2016 <- function(...){ eum <- as.matrix(read.xls(suppdata("10.5061/dryad.86h2k", "PMATOS_DATA_DRYADES.xlsx"), 3)[,-1]) lookup <- read.xls(suppdata("10.5061/dryad.86h2k", "PMATOS_DATA_DRYADES.xlsx"), 4, as.is=TRUE) - lookup$Species.name <- sanitize_text(lookup$Species.name) + lookup$Species.name <- .sanitize.text(lookup$Species.name) lookup$Species.name <- sapply(strsplit(lookup$Species.name, " "), function(x) paste(x[1:2], collapse="_")) lookup <- setNames(lookup$Species.name, lookup$Code) @@ -1217,7 +1216,7 @@ petermann.2016 <- function(...){ download.file("https://www.datadryad.org/bitstream/handle/10255/dryad.129944/BB_all_4_SimilMatrices_Dryad.xlsx?sequence=1", tmp.file) data <- read.xls(tmp.file, sheet=2) lookup <- read.xls(suppdata("10.5061/dryad.44bm6", "BB_all_4_SimilMatrices_Dryad.xlsx"), sheet=1, skip=5, header=FALSE, as.is=TRUE)[-1:-8,] - lookup[,2] <- sanitize_text(lookup[,2]) + lookup[,2] <- .sanitize.text(lookup[,2]) lookup[,2] <- sapply(strsplit(lookup[,2], " "), function(x) paste(x[1:2],collapse="_")) lookup <- setNames(lookup[,2], lookup[,1]) names(data)[names(data) %in% names(lookup)] <- lookup[names(data)[names(data) %in% names(lookup)]] @@ -1252,7 +1251,7 @@ if(FALSE){ download.file("https://zenodo.org/record/1198846/files/template_MosquitoDataBrant77.xlsx", tmp.file) DailyHLC <- read.xls(tmp.file, sheet=4, as.is=TRUE, skip=9) lookup <- read.xls(tmp.file, sheet=3, as.is=TRUE) - lookup[,2] <- sanitize_text(lookup[,2]) + lookup[,2] <- .sanitize.text(lookup[,2]) #lookup[,2] <- sapply(strsplit(lookup[,2], " "), function(x) paste(x[1:2],collapse="_")) lookup <- setNames(lookup[,2], lookup[,1]) names(DailyHLC) <- gsub("_count", "", names(DailyHLC), fixed=TRUE) diff --git a/R/utility.R b/R/utility.R index ec7c00d..a591909 100644 --- a/R/utility.R +++ b/R/utility.R @@ -1,17 +1,3 @@ -#' Takes a matrix of data for a species, checks if its numeric, then -#' puts the table into a long-format dataframe -#' -#' @param x a matrix of data, generally species in the columns and -#' sites in the row -#' @param row.metadata metadata for the sites; in long format, it will -#' be stored in each row with with the site pertaining to the data -#' @param col.metadata metadata for the species; will be stored in -#' every 'n'th row, where 'n' is the number of rows in the -#' original table -#' @param total.metadata metadata for table; will include publishing -#' information -#' @importFrom reshape2 melt -#' @return data set in long format, with all metadata included .matrix.melt <- function(x, study.metadata=data.frame(units=NA, other=NA), site.metadata=data.frame(id=NA,year=NA,name=NA,lat=NA,long=NA,address=NA,area=NA,other=NA), species.metadata=data.frame(species=NA, taxonomy=NA, other=NA)){ @@ -239,3 +225,19 @@ prog.bar <- function(x, y){ tryCatch(if(z[1] < 1) if((length(z) %% 10)==0) cat("|") else cat("."), error=function(z) cat(".")) } } +# This is testdat::santize_text; taken so that this package can be uploaded to CRAN +#' @importFrom assertthat assert_that +.sanitize.text <- function(input_text) { + assert_that(is.character(input_text)) + sanitize.each.element <- function(elem) { + if (Encoding(elem) == "unknown") + enc <- "ASCII" + else + enc <- Encoding(elem) + + iconv(elem, from=enc, to="ASCII", sub="") + } + input_text <- sapply(input_text, sanitize.each.element) + names(input_text) <- NULL + input_text +} diff --git a/man/nacdb.Rd b/man/MADcomm.Rd similarity index 83% rename from man/nacdb.Rd rename to man/MADcomm.Rd index 3ea0ba3..9c616ce 100644 --- a/man/nacdb.Rd +++ b/man/MADcomm.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/nacdb.R -\name{nacdb} -\alias{nacdb} -\title{Builds a community database} +% Please edit documentation in R/MADcomm.R +\name{MADcomm} +\alias{MADcomm} +\title{Make A Database of community data} \usage{ -nacdb(cache, datasets, delay = 5) +MADcomm(cache, datasets, delay = 5) } \arguments{ \item{cache}{Folder where cached downloads are stored} @@ -17,10 +17,10 @@ will be downloaded and returned.} overload); default is 5 seconds.} } \value{ -nacdb.data object. XXX +MADcomm.data object. XXX } \description{ -The key function of the nacdb package. When run with defaults, it +The key function of the MADcomm package. When run with defaults, it will download and build a database of species' traits from all the manuscript sources in the package. This totals XXX manuscripts/databases, XXX species, and XXX traits. Please note