Skip to content

Commit

Permalink
Merge cran_1-0-0
Browse files Browse the repository at this point in the history
Merge branch 'cran_1-0-0'

Conflicts:
	README.md
  • Loading branch information
pbastide committed Jan 31, 2017
2 parents d2c0968 + 0d8b71c commit 2a688ca
Show file tree
Hide file tree
Showing 44 changed files with 557 additions and 216 deletions.
3 changes: 3 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@
^\.travis\.yml$
^_pkgdown\.yml$
^docs$
^data-raw$
^README\.md$
^cran-comments\.md$
11 changes: 7 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
Package: PhylogeneticEM
Title: Automatic Shift Detection using a Phylogenetic EM
Version: 0.0.0.9000
Version: 1.0.0
Authors@R: c(
person("Paul", "Bastide", email = "paul.bastide@agroparistech.fr", role = c("aut", "cre")),
person("Paul", "Bastide", email = "paul.bastide@m4x.org", role = c("aut", "cre")),
person("Mahendra", "Mariadassou", role = "ctb"))
Description: What the package does (one paragraph).
Description:
Implementation of the automatic shift detection method for
Brownian Motion (BM) or Ornstein–Uhlenbeck (OU) models of trait evolution on
phylogenies. Some tools to handle equivalent shifts configurations are also
available.
Depends:
ape (>= 3.5),
Matrix (>= 1.2.3),
Expand All @@ -21,7 +25,6 @@ Imports:
methods (>= 3.2.1),
plyr (>= 1.8.4),
Rcpp (>= 0.12.6),
RcppArmadillo (>= 0.5.500.2.0),
robustbase (>= 0.92.6),
stats (>= 3.2.1),
utils (>= 3.2.1)
Expand Down
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ export(shifts_to_simmap)
export(simul_process)
export(transform_branch_length)
import(Matrix)
import(RcppArmadillo)
import(ape)
import(methods)
importFrom(Rcpp,evalCpp)
Expand Down
2 changes: 1 addition & 1 deletion R/E_step.R
Original file line number Diff line number Diff line change
Expand Up @@ -968,7 +968,7 @@ compute_log_likelihood.simple.nomissing.BM <- function(phylo, Y_data, sim,

#' @useDynLib PhylogeneticEM
#' @importFrom Rcpp evalCpp
#' @import RcppArmadillo
# @import RcppArmadillo

compute_E.upward_downward <- function(phylo,
Y_data,
Expand Down
20 changes: 20 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#' New World Monkeys dataset
#'
#' Morphometric dataset and phylogeny for brain shape variation of 50 species of
#' New World monkeys (platyrrhine).
#'
#' @format A list containing two objects:
#' \describe{
#' \item{phy}{The Phylogenetic tree for the platyrrhine species, pruned to match
#' the species in the morphometric dataset}
#' \item{dat}{First two PC scores from a PCA of the species-averaged Procrustes
#' coordinates}
#' }
#'
#' @references
#' Aristide, L., dos Reis, S. F., Machado, A. C., Lima, I., Lopes, R. T.
#' & Perez, S. I. (2016). Brain shape convergence in the adaptive radiation of New
#' World monkeys. Proceedings of the National Academy of Sciences, 113(8), 2158–2163.
#' http://doi.org/10.1073/pnas.1514473113
#'
"monkeys"
145 changes: 95 additions & 50 deletions R/estimateEM.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,14 @@ NULL
#' \item{normalized_half_life}{default to 10^(-2)}
#' \item{log_likelihood}{default to 10^(-2)}
#' }
#' @param Nbr_It_Max the maximal number of iterations of the EM allowed. Default to
#' @param Nbr_It_Max the maximal number of iterations of the EM allowed. Default to
#' 500 iterations.
#' @param nbr_of_shifts the number of shifts allowed.
#' @param alpha_known is the selection strength assumed to be known ?
#' Default to FALSE.
#' @param eps tolerance on the selection strength value before switching to a BM.
#' Default to 10^(-3).
#' @param known.selection.strength if \code{alpha_known=TRUE}, the value of the
#' @param known.selection.strength if \code{alpha_known=TRUE}, the value of the
#' known selection strength.
#' @param init.selection.strength (optional) a starting point for the selection
#' strength value.
Expand Down Expand Up @@ -149,19 +149,23 @@ NULL
#' function \code{\link{enumerate_tips_under_edges}}.
#' @param T_tree (optional) matrix of incidence of the tree, result of function
#' \code{\link{incidence.matrix}}.
#' @param U_tree (optional) full matrix of incidence of the tree, result of function
#' @param U_tree (optional) full matrix of incidence of the tree, result of function
#' \code{\link{incidence.matrix.full}}.
#' @param h_tree (optional) total height of the tree.
#' @param F_moments (optional, internal)
#' @param tol_half_life should the tolerance criterion be applied to the phylogenetic
#' half life (TRUE, default) or to the raw selection strength ?
#' @param tol_half_life should the tolerance criterion be applied to the
#' phylogenetic half life (TRUE, default) or to the raw selection strength ?
#' @param warning_several_solutions wether to issue a warning if several equivalent
#' solutions are found (default to TRUE).
#' @param convergence_mode one of "relative" (the default) or "absolute". Should the
#' tolerance be applied to the raw parameters, or to the renormalized ones ?
#' @param check_convergence_likelihood should the likelihood be taken into
#' consideration for convergence assesment ? (default to TRUE).
#'
#' @param method.OUsun Method to be used in univariate OU. One of "rescale"
#' (rescale the tree to fit a BM) or "raw" (directly use an OU, only available for
#' univariate processes).
#' @param sBM_variance Is the root of the BM supposed to be random and
#' "stationnary"? Used for BM equivalent computations. Default to FALSE.
#'
#' @return
#' An object of class \code{EstimateEM}.
Expand Down Expand Up @@ -890,8 +894,8 @@ estimateEM <- function(phylo,
#' Need to be positive. Default to 0.1.
#' @param C.BM2 Multiplying constant to be used for the BigeMassart2 method.
#' Default to 2.5.
#' @param C.LINselect Multiplying constant to be used for the LINselect method. Need to be
#' greater than 1. Default to 1.1.
#' @param C.LINselect Multiplying constant to be used for the LINselect method.
#' Need to be greater than 1. Default to 1.1.
#' @param method.variance Algorithm to be used for the moments computations at the
#' E step. One of "simple" for the naive method; of "upward_downward" for the
#' Upward Downward method (usually faster). Default to "upward_downward".
Expand All @@ -917,6 +921,8 @@ estimateEM <- function(phylo,
#' multivariate traits. OU with univariate traits can take both TRUE or FALSE. If
#' TRUE, a grid based on the branch length of the tree is automatically computed,
#' using function \code{\link{find_grid_alpha}}.
#' @param nbr_alpha If \code{alpha_grid=TRUE}, the number of alpha values on the
#' grid. Default to 10.
#' @param random.root Wether the root is assumed to be random (TRUE) of fixed
#' (FALSE). Default to TRUE
#' @param stationary.root Wether the root is assumed to be in the stationnary
Expand All @@ -930,13 +936,14 @@ estimateEM <- function(phylo,
#' @param estimates The result of a previous run of this same function. This
#' function can be re-run for other model election method. Default to NULL.
#' @param save_step If alpha_grid=TRUE, wether to save the intermediate results
#' for each value of alpha. Useful for long computations. Default to FALSE.
#' @param sBM_variance DEPRECATED. Used for BM equivalent computations.
#' for each value of alpha (in a temporary file). Useful for long computations.
#' Default to FALSE.
#' @param method.OUsun DEPRECATED. Method to be used in univariate OU.
#' @param parallel_alpha EXPERIMENTAL If alpha_grid=TRUE, wether to run the
# @param sBM_variance DEPRECATED. Used for BM equivalent computations.
# Default to FALSE.
# @param method.OUsun DEPRECATED. Method to be used in univariate OU.
#' @param parallel_alpha If alpha_grid=TRUE, wether to run the
#' estimations with different values of alpha on separate cores. Default to
#' FALSE.
#' FALSE. If TRUE, the log is written as a temporary file.
#' @param Ncores If parallel_alpha=TRUE, number of cores to be used.
# @param exportFunctions DEPRECATED. TO BE REMOVED.
#' @param impute_init_Rphylopars Wether to use
Expand All @@ -960,6 +967,33 @@ estimateEM <- function(phylo,
#' @seealso \code{\link{plot.PhyloEM}}, \code{\link{params_process.PhyloEM}},
#' \code{\link{imputed_traits.PhyloEM}}
#'
#' @examples
#' \dontrun{
#' ## Load Data
#' data(monkeys)
#' ## Run method
#' # Note: use more alpha values for better results.
#' res <- PhyloEM(Y_data = monkeys$dat, ## data
#' phylo = monkeys$phy, ## phylogeny
#' process = "scOU", ## scalar OU
#' random.root = TRUE, ## root is stationary
#' stationary.root = TRUE,
#' K_max = 10, ## maximal number of shifts
#' nbr_alpha = 4, ## number of alpha values
#' parallel_alpha = TRUE, ## parallelize on alpha values
#' Ncores = 2)
#' ## Plot selected solution (LINselect)
#' plot(res) # three shifts
#' ## Plot selected solution (DDSE)
#' plot(res, method.selection = "DDSE") # no shift
#' ## Extract and solution with 5 shifts
#' params_5 <- params_process(res, K = 5)
#' plot(res, params = params_5)
#' ## Show all equivalent solutions
#' eq_sol <- equivalent_shifts(monkeys$phy, params_5)
#' plot(eq_sol)
#' }
#'
#' @export
#'
##
Expand Down Expand Up @@ -998,15 +1032,16 @@ PhyloEM <- function(phylo, Y_data, process = c("BM", "OU", "scOU", "rBM"),
method.init.alpha.estimation = c("regression", "regression.MM", "median"),
methods.segmentation = c("lasso", "best_single_move"),
alpha_grid = TRUE,
nbr_alpha = 10,
random.root = TRUE,
stationary.root = TRUE,
alpha = NULL,
check.tips.names = TRUE,
progress.bar = TRUE,
estimates = NULL,
save_step = FALSE,
sBM_variance = FALSE,
method.OUsun = "rescale",
# sBM_variance = FALSE,
#method.OUsun = "rescale",
parallel_alpha = FALSE,
Ncores = 3,
# exportFunctions = ls(),
Expand Down Expand Up @@ -1034,6 +1069,7 @@ PhyloEM <- function(phylo, Y_data, process = c("BM", "OU", "scOU", "rBM"),
p <- nrow(Y_data)
ntaxa <- length(phylo$tip.label)
## Independent traits #######################################################
method.OUsun = "rescale"
if (p == 1) {
method.OUsun = "raw"
independent = TRUE
Expand All @@ -1042,7 +1078,7 @@ PhyloEM <- function(phylo, Y_data, process = c("BM", "OU", "scOU", "rBM"),
## Adaptations to the BM ####################################################
if (process == "BM"){
if (independent){
warning("The independent option is not available for the BM. The traits are supposed to be correlated.")
if (p > 1) warning("The independent option is not available for the BM. The traits are supposed to be correlated.")
independent <- FALSE
}
alpha_grid <- TRUE
Expand Down Expand Up @@ -1110,11 +1146,12 @@ PhyloEM <- function(phylo, Y_data, process = c("BM", "OU", "scOU", "rBM"),
random.root = random.root,
stationary.root = stationary.root,
alpha = alpha,
nbr_alpha = nbr_alpha,
check.tips.names = check.tips.names,
progress.bar = progress.bar,
estimates = estimates,
save_step = save_step,
sBM_variance = sBM_variance,
# sBM_variance = sBM_variance,
method.OUsun = method.OUsun,
parallel_alpha = parallel_alpha,
Ncores = Ncores,
Expand Down Expand Up @@ -1640,17 +1677,23 @@ PhyloEM_grid_alpha <- function(phylo, Y_data, process = c("BM", "OU", "scOU", "r
independent = FALSE,
K_max, use_previous = TRUE,
order = TRUE,
method.selection = c("BirgeMassart1", "BirgeMassart2", "BGHuni", "pBIC", "pBIC_l1ou", "BGHlsq", "BGHml", "BGHlsqraw", "BGHmlraw"),
method.selection = c("BirgeMassart1", "BirgeMassart2",
"BGHuni", "pBIC", "pBIC_l1ou",
"BGHlsq", "BGHml",
"BGHlsqraw", "BGHmlraw"),
C.BM1 = 0.1, C.BM2 = 2.5, C.LINselect = 1.1,
method.variance = "simple",
method.init = "default",
method.init.alpha = "default",
method.init.alpha.estimation = c("regression", "regression.MM", "median"),
method.init.alpha.estimation = c("regression",
"regression.MM",
"median"),
methods.segmentation = c("lasso", "best_single_move"),
alpha_known = TRUE,
random.root = FALSE,
stationary.root = FALSE,
alpha = NULL,
nbr_alpha = nbr_alpha,
check.tips.names = FALSE,
progress.bar = TRUE,
estimates = NULL,
Expand Down Expand Up @@ -1695,7 +1738,7 @@ PhyloEM_grid_alpha <- function(phylo, Y_data, process = c("BM", "OU", "scOU", "r
if (process == "BM") {
alpha <- 0
} else {
alpha <- find_grid_alpha(phylo, alpha, ...)
alpha <- find_grid_alpha(phylo, alpha, nbr_alpha = nbr_alpha, ...)
if (stationary.root) alpha <- alpha[alpha != 0]
}
## Loop on alpha
Expand Down Expand Up @@ -1916,6 +1959,7 @@ PhyloEM_grid_alpha <- function(phylo, Y_data, process = c("BM", "OU", "scOU", "r
call. = FALSE)
}
cl <- parallel::makeCluster(Ncores, outfile = "")
# outfile = tempfile(pattern = "log_file_dopar_"))
doParallel::registerDoParallel(cl)
X <- foreach::foreach(a_greek = alpha, .packages = reqpckg) %dopar%
{
Expand Down Expand Up @@ -2270,7 +2314,9 @@ Phylo_EM_sequencial <- function(phylo, Y_data,
}
## Format results and return
res <- format_output_several_K_single(XX, light_result)
if (save_step) save(res, file = paste0("Tmp_", alp, "_", format(Sys.time(), "%Y-%m-%d_%H-%M-%S")))
if (save_step) save(res,
file = tempfile(pattern = paste0("Alpha=", alp, "_"),
fileext = c(".RData")))
return(res)
}

Expand Down Expand Up @@ -2788,11 +2834,11 @@ format_output <- function(results_estim_EM, phylo, time = NA){
return(X)
}

format_output_several_K <- function(res_sev_K, out, alpha = "estimated"){
alpha <- paste0("alpha_", alpha)
out[[alpha]] <- format_output_several_K_single(res_sev_K)
return(out)
}
# format_output_several_K <- function(res_sev_K, out, alpha = "estimated"){
# alpha <- paste0("alpha_", alpha)
# out[[alpha]] <- format_output_several_K_single(res_sev_K)
# return(out)
# }

format_output_several_K_single <- function(res_sev_K, light_result = TRUE){
dd <- do.call(rbind, res_sev_K)
Expand Down Expand Up @@ -2831,30 +2877,29 @@ format_output_several_K_single <- function(res_sev_K, light_result = TRUE){
return(res)
}

##
#' @title Maximal number of shifts allowed
#'
#' @description
#' \code{compute_K_max} computes the quantity
#' min(floor(kappa * ntaxa / (2 + log(2) + log(ntaxa))), ntaxa - 7))
#' that is the maximal dimention allowed to get theoretical garenties during
#' the selection model, when using the procedure defined by Baraud et al (2009)
#'
#' @details
#' See Baraud et al (2009)
#'
#' @param ntaxa the number of tips
#' @param kappa a real strictly bellow 1.
#'
#' @return K_max the maximal number of shifts allowed.
#'
#' @keywords internal
#'
##
compute_K_max <- function(ntaxa, kappa = 0.9){
if (kappa >= 1) stop("For K_max computation, one must have kappa < 1")
return(min(floor(kappa * ntaxa / (2 + log(2) + log(ntaxa))), ntaxa - 7))
}
# ##@title Maximal number of shifts allowed
#
# @description
# \code{compute_K_max} computes the quantity
# min(floor(kappa * ntaxa / (2 + log(2) + log(ntaxa))), ntaxa - 7))
# that is the maximal dimention allowed to get theoretical garenties during
# the selection model, when using the procedure defined by Baraud et al (2009)
#
# @details
# See Baraud et al (2009)
#
# @param ntaxa the number of tips
# @param kappa a real strictly bellow 1.
#
# @return K_max the maximal number of shifts allowed.
#
# @keywords internal
#
# #
# compute_K_max <- function(ntaxa, kappa = 0.9){
# if (kappa >= 1) stop("For K_max computation, one must have kappa < 1")
# return(min(floor(kappa * ntaxa / (2 + log(2) + log(ntaxa))), ntaxa - 7))
# }

expand_method_selection <- function(method.selection){
if ("Djump" %in% method.selection){
Expand Down
5 changes: 3 additions & 2 deletions R/generic_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -678,8 +678,9 @@ check_dimensions.shifts <- function(p, shifts){
##
find_grid_alpha <- function(phy, alpha = NULL,
nbr_alpha = 10,
factor_up_alpha = 2, factor_down_alpha = 3,
quantile_low_distance = 0.05,
factor_up_alpha = 2,
factor_down_alpha = 3,
quantile_low_distance = 0.0001,
log_transform = TRUE, ...){
if (!is.null(alpha)) return(alpha)
dtips <- cophenetic(phy)
Expand Down
Loading

0 comments on commit 2a688ca

Please sign in to comment.