Name change

pearselab · Jul 3, 2019 · feb3015 · feb3015
1 parent d61a54d
commit feb3015
Show file tree

Hide file tree

Showing 7 changed files with 78 additions and 70 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,13 +1,18 @@
-Package: nacdb
+Package: MADcomm
 Type: Package
-Title: Not A Trait DataBase
-Version: 1.0
-Date: 2017-05-16
+Title: Make A Database of traits
+Version: 1.0-0
 Author: William D. Pearse, 
 Maintainer: William D. Pearse <[email protected]>
-Description: Downloads community presence/absence and abundance data
-    from published sources, and collates them into an easy-to-use
-    format for use within R.
+Description: Automates the download and assembly of ecological assemblage
+    data from published sources. This package is not a database, but
+    rather Makes A Database of communities from published, existing
+    sources. Those who have contributed community data that is downloaded
+    should, of course, be appropriately referenced when their data are
+    used (tools are available within this package to aid that). Some
+    basic data checking, and functions to perform data checking, are
+    included within this package, but the user should check their data
+    before serious use.
 License: MIT + file LICENSE
 Depends:
     R (>= 2.10)
@@ -20,6 +25,8 @@ Imports:
     httr (>= 1.2.1),
     taxize,
     readxl,
-    picante
+    picante,
+    neonUtilities,
+    assertthat
 RoxygenNote: 6.1.1
 Encoding: UTF-8
diff --git a/NAMESPACE b/NAMESPACE
@@ -43,7 +43,8 @@ export(.stevens.2011)
 export(.thibault.2011)
 export(.wearn.2016a)
 export(.wearn.2016b)
-export(nacdb)
+export(MADcomm)
+importFrom(assertthat,assert_that)
 importFrom(gdata,drop.levels)
 importFrom(gdata,ls.funs)
 importFrom(gdata,read.xls)
@@ -61,7 +62,6 @@ importFrom(stats,reshape)
 importFrom(stats,setNames)
 importFrom(suppdata,suppdata)
 importFrom(taxize,gnr_resolve)
-importFrom(testdat,sanitize_text)
 importFrom(utils,data)
 importFrom(utils,download.file)
 importFrom(utils,head)

diff --git a/R/nacdb.R → R/MADcomm.R b/R/nacdb.R → R/MADcomm.R
@@ -1,6 +1,6 @@
-#' Builds a community database
+#' Make A Database of community data
 #'
-#' The key function of the nacdb package. When run with defaults, it
+#' The key function of the MADcomm package. When run with defaults, it
 #' will download and build a database of species' traits from all the
 #' manuscript sources in the package. This totals XXX
 #' manuscripts/databases, XXX species, and XXX traits. Please note
@@ -20,24 +20,24 @@
 #' @param cache Folder where cached downloads are stored
 #' @param delay How long to wait between downloads (to save server
 #'     overload); default is 5 seconds.
-#' @return nacdb.data object. XXX
+#' @return MADcomm.data object. XXX
 #' @author Will Pearse; Bodie; etc.
 #' #@examples
 #' # Limit the scope of these as they have to work online on servers!...
 #' #@seealso 
 #' @export
 #' @importFrom gdata ls.funs
-nacdb <- function(cache, datasets, delay=5){
+MADcomm <- function(cache, datasets, delay=5){
     #Check datasets
     if(missing(datasets)){
-        datasets <- Filter(Negate(is.function), ls(pattern="^\\.[a-z]*\\.[0-9]+", name="package:nacdb", all.names=TRUE))
+        datasets <- Filter(Negate(is.function), ls(pattern="^\\.[a-z]*\\.[0-9]+", name="package:MADcomm", all.names=TRUE))
     } else {
         datasets <- paste0(".", tolower(datasets))
         datasets <- gsub("..", ".", datasets, fixed=TRUE)
     }
     if(!all(datasets %in% datasets)){
         missing <- setdiff(datasets, ls.funs())
-        stop("Error: ", paste(missing, collapse=", "), "not in nacdb")
+        stop("Error: ", paste(missing, collapse=", "), "not in MADcomm")
     }
 
     #Do data loads
@@ -79,14 +79,14 @@ nacdb <- function(cache, datasets, delay=5){
         site.metadata=do.call(rbind, lapply(output, function(x) x$site.metadata)),
         study.metadata=do.call(rbind, lapply(output, function(x) x$study.metadata))
     )
-    class(output) <- "nacdb"
+    class(output) <- "MADcomm"
     return(output)
 }
 
-print.nacdb <- function(x, ...){
+print.MADcomm <- function(x, ...){
     # Argument handling
-    if(!inherits(x, "nacdb"))
-        stop("'", deparse(substitute(x)), "' must be of type 'nacdb'")
+    if(!inherits(x, "MADcomm"))
+        stop("'", deparse(substitute(x)), "' must be of type 'MADcomm'")
 
     # Create a simple summary matrix of species and sites in x
     n.species <- length(unique(species(x)))
@@ -98,14 +98,14 @@ print.nacdb <- function(x, ...){
     invisible(setNames(c(n.species,n.sites), c("n.species","n.sites")))
 }
 
-summary.nacdb <- function(x, ...){
-    print.nacdb(x, ...)
+summary.MADcomm <- function(x, ...){
+    print.MADcomm(x, ...)
 }
 
-"[.nacdb" <- function(x, sites, spp){
+"[.MADcomm" <- function(x, sites, spp){
     # Argument handling
-    if(!inherits(x, "nacdb"))
-        stop("'", deparse(substitute(x)), "' must be of type 'nacdb'")
+    if(!inherits(x, "MADcomm"))
+        stop("'", deparse(substitute(x)), "' must be of type 'MADcomm'")
 
     # Setup null output in case of no match
     null <- list(
@@ -114,7 +114,7 @@ summary.nacdb <- function(x, ...){
         site.metadata=data.frame(id=NA,year=NA,name=NA,lat=NA,long=NA,address=NA,other=NA),
         spp.metadata=data.frame(species=NA, taxonomy=NA, other=NA)
     )
-    class(null) <- "nacdb"
+    class(null) <- "MADcomm"
 
     # Site subsetting
     if(!missing(sites)){
@@ -145,38 +145,38 @@ summary.nacdb <- function(x, ...){
 }
 
 species <- function(x, ...){
-    if(!inherits(x, "nacdb"))
-        stop("'", deparse(substitute(x)), "' must be of type 'nacdb'")
+    if(!inherits(x, "MADcomm"))
+        stop("'", deparse(substitute(x)), "' must be of type 'MADcomm'")
     return(unique(x$spp.metadata$species))
-    # Return a vector of the sites in nacdb (?)
+    # Return a vector of the sites in MADcomm (?)
 }
 
 sites <- function(x, ...){
-    if(!inherits(x, "nacdb"))
-        stop("'", deparse(substitute(x)), "' must be of type 'nacdb'")
+    if(!inherits(x, "MADcomm"))
+        stop("'", deparse(substitute(x)), "' must be of type 'MADcomm'")
     return(unique(x$site.metadata$id))
 }
 
 citations <- function(x){
-    if(!inherits(x, "nacdb"))
-        stop("'", deparse(substitute(x)), "' must be of type 'nacdb'")
+    if(!inherits(x, "MADcomm"))
+        stop("'", deparse(substitute(x)), "' must be of type 'MADcomm'")
 
-    data(nacdb_citations)
-    datasets <- Filter(Negate(is.function), ls(pattern="^\\.[a-z]*\\.[0-9]+[a-d]?", name="package:nacdb", all.names=TRUE))
-    nacdb.citations$Name <- with(nacdb.citations, paste0(".", tolower(Author), ".", Year))
+    data(MADcomm_citations)
+    datasets <- Filter(Negate(is.function), ls(pattern="^\\.[a-z]*\\.[0-9]+[a-d]?", name="package:MADcomm", all.names=TRUE))
+    MADcomm.citations$Name <- with(MADcomm.citations, paste0(".", tolower(Author), ".", Year))
 
-    return(as.character(nacdb.citations$BibTeX.citation[match(datasets, nacdb.citations$Name)]))
+    return(as.character(MADcomm.citations$BibTeX.citation[match(datasets, MADcomm.citations$Name)]))
 }
 
 # I added this during ARGON, and while it's useful I think I need to
 # think a little more coherently abotu how to let users interact with
 # study-level meta-data
 if(FALSE){
-    #' @method subset nacdb
+    #' @method subset MADcomm
     #' @export
     subset.study <- function(x, studies, ...){
-        if(!inherits(x, "nacdb"))
-            stop("'", deparse(substitute(x)), "' must be of type 'nacdb'")
+        if(!inherits(x, "MADcomm"))
+            stop("'", deparse(substitute(x)), "' must be of type 'MADcomm'")
 
         x$data <- x$data[x$data$study %in% studies,]
         x$spp.metadata <- x$spp.metadata[x$spp.metadata$study %in% studies,]

diff --git a/R/cleaning.R b/R/cleaning.R
@@ -1,7 +1,7 @@
-clean.nacdb <- function(x){
+clean.MADcomm <- function(x){
     # Argument handling
-    if(!inherits(x, "nacdb"))
-        stop("'", deparse(substitute(x)), "' must be of type 'nacdb'")
+    if(!inherits(x, "MADcomm"))
+        stop("'", deparse(substitute(x)), "' must be of type 'MADcomm'")
 
     # Clean up any obvious weirdnesses with the site names
 
@@ -15,12 +15,12 @@ clean.nacdb <- function(x){
 # require the addition of some sort of cache, as there will be
 # *thousands* of species names that need adding in here
 #' @importFrom taxize gnr_resolve
-clean.nacdb.names <- function(x, thresh, ...){
+clean.MADcomm.names <- function(x, thresh, ...){
     # Argument handling
     if(!inherits(x, "natdb"))
         stop("'", deparse(substitute(x)), "' must be of type 'natdb'")
 
-    # This code doesn't work on a nacdb object, probably, but the general structure will
+    # This code doesn't work on a MADcomm object, probably, but the general structure will
     spp <- unique(c(unique(x$numeric$species), unique(x$categorical$species)))
     dwn.spp <- gnr_resolve(spp)
     dwn.spp <- dwn.spp[!duplicated(dwn.spp$user_supplied_name),]

diff --git a/R/downloads.R b/R/downloads.R
@@ -5,7 +5,6 @@
 #' @importFrom utils data head read.csv read.delim read.table
 #' @importFrom stats aggregate na.omit reshape
 #' @importFrom gdata drop.levels
-#' @importFrom testdat sanitize_text
 #' @importFrom readxl read_xlsx read_xls read_excel
 #' @importFrom neonUtilities loadByProduct
 #' @export
@@ -980,7 +979,7 @@ petermann.2016 <- function(...){
     eum <- as.matrix(read.xls(suppdata("10.5061/dryad.86h2k", "PMATOS_DATA_DRYADES.xlsx"), 3)[,-1])
 
     lookup <- read.xls(suppdata("10.5061/dryad.86h2k", "PMATOS_DATA_DRYADES.xlsx"), 4, as.is=TRUE)
-    lookup$Species.name <- sanitize_text(lookup$Species.name)
+    lookup$Species.name <- .sanitize.text(lookup$Species.name)
     lookup$Species.name <- sapply(strsplit(lookup$Species.name, " "), function(x) paste(x[1:2], collapse="_"))
     lookup <- setNames(lookup$Species.name, lookup$Code)
 
@@ -1217,7 +1216,7 @@ petermann.2016 <- function(...){
     download.file("https://www.datadryad.org/bitstream/handle/10255/dryad.129944/BB_all_4_SimilMatrices_Dryad.xlsx?sequence=1", tmp.file)
     data <- read.xls(tmp.file, sheet=2)
     lookup <- read.xls(suppdata("10.5061/dryad.44bm6", "BB_all_4_SimilMatrices_Dryad.xlsx"), sheet=1, skip=5, header=FALSE, as.is=TRUE)[-1:-8,]
-    lookup[,2] <- sanitize_text(lookup[,2])  
+    lookup[,2] <- .sanitize.text(lookup[,2])  
     lookup[,2] <- sapply(strsplit(lookup[,2], " "), function(x) paste(x[1:2],collapse="_"))
     lookup <- setNames(lookup[,2], lookup[,1])
     names(data)[names(data) %in% names(lookup)] <- lookup[names(data)[names(data) %in% names(lookup)]]
@@ -1252,7 +1251,7 @@ if(FALSE){
     download.file("https://zenodo.org/record/1198846/files/template_MosquitoDataBrant77.xlsx", tmp.file)
     DailyHLC <- read.xls(tmp.file, sheet=4, as.is=TRUE, skip=9)
     lookup <- read.xls(tmp.file, sheet=3, as.is=TRUE)
-    lookup[,2] <- sanitize_text(lookup[,2])  
+    lookup[,2] <- .sanitize.text(lookup[,2])  
     #lookup[,2] <- sapply(strsplit(lookup[,2], " "), function(x) paste(x[1:2],collapse="_"))
     lookup <- setNames(lookup[,2], lookup[,1])
     names(DailyHLC) <- gsub("_count", "", names(DailyHLC), fixed=TRUE)

diff --git a/R/utility.R b/R/utility.R
@@ -1,17 +1,3 @@
-#' Takes a matrix of data for a species, checks if its numeric, then
-#' puts the table into a long-format dataframe
-#'
-#' @param x a matrix of data, generally species in the columns and
-#'     sites in the row
-#' @param row.metadata metadata for the sites; in long format, it will
-#'     be stored in each row with with the site pertaining to the data
-#' @param col.metadata metadata for the species; will be stored in
-#'     every 'n'th row, where 'n' is the number of rows in the
-#'     original table
-#' @param total.metadata metadata for table; will include publishing
-#'     information
-#' @importFrom reshape2 melt
-#' @return data set in long format, with all metadata included
 .matrix.melt <- function(x, study.metadata=data.frame(units=NA, other=NA),
                          site.metadata=data.frame(id=NA,year=NA,name=NA,lat=NA,long=NA,address=NA,area=NA,other=NA),
                          species.metadata=data.frame(species=NA, taxonomy=NA, other=NA)){
@@ -239,3 +225,19 @@ prog.bar <- function(x, y){
                 tryCatch(if(z[1] < 1) if((length(z) %% 10)==0) cat("|") else cat("."), error=function(z) cat("."))
         }
 }
+# This is testdat::santize_text; taken so that this package can be uploaded to CRAN
+#' @importFrom assertthat assert_that
+.sanitize.text <- function(input_text) {
+    assert_that(is.character(input_text))
+    sanitize.each.element <- function(elem) {
+        if (Encoding(elem) == "unknown")
+            enc <- "ASCII"
+        else
+            enc <- Encoding(elem)
+
+        iconv(elem, from=enc, to="ASCII", sub="")
+    }
+    input_text <- sapply(input_text, sanitize.each.element)
+    names(input_text) <- NULL
+    input_text
+}
diff --git a/man/nacdb.Rd → man/MADcomm.Rd b/man/nacdb.Rd → man/MADcomm.Rd