Skip to content

Commit

Permalink
Merge pull request #1 from alethere/newcalc
Browse files Browse the repository at this point in the history
Newcalc
  • Loading branch information
alethere authored Apr 1, 2022
2 parents 0bcda3e + 598ebaf commit ddfe161
Show file tree
Hide file tree
Showing 11 changed files with 838 additions and 134 deletions.
8 changes: 6 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@
S3method(predict_IBD,list)
S3method(predict_IBD,matrix)
S3method(predict_IBD,numeric)
S3method(calc_IBD,numeric)
S3method(calc_IBD,matrix)
S3method(naive_IBD,numeric)
S3method(naive_IBD,matrix)
S3method(hmm_IBD,numeric)
S3method(hmm_IBD,matrix)
S3method(heur_IBD,numeric)
S3method(heur_IBD,matrix)
S3method(rec_count,matrix)
S3method(rec_count,list)
S3method(graphical_genotype,matrix)
Expand Down
539 changes: 513 additions & 26 deletions R/calc_IBD.R

Large diffs are not rendered by default.

25 changes: 12 additions & 13 deletions R/mapping.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,18 @@
#' @return Estimated linkage map, including position and other parameters.
#' @keywords internal
mdsmap <- function(linkdf,ndim = 2){
linkdf <- linkdf[order(linkdf$marker_a,linkdf$marker_b),]
linkdf <- linkdf[!duplicated(paste0(linkdf$marker_a,linkdf$marker_b)),]
text <- paste(c(length(unique(unlist(linkdf[,1:2]))),
apply(linkdf[,c(1,2,3,4)],1,paste,collapse = " ")),
collapse = "\n")
linkdf <- polymapR:::prepare_pwd(linkdf)
file <- paste0("tmp.",paste0(sample(c(letters,LETTERS),10),collapse = ""),".map")
write(x = text,file = file)
if(ndim == 2){
newmap <- MDSMap::calc.maps.pc(file,weightfn = "lod2")
}else if(ndim == 3){
newmap <- MDSMap::calc.maps.sphere(file,weightfn = "lod2",p= 200)
}
count <- length(unique(unlist(linkdf[,1:2])))
write(count,file = file)
write.table(linkdf, file = file, append = T,
col.names = F, row.names = F, quote = F)

newmap <- MDSMap::calc.maps.pc(file,weightfn = "lod2",ndim = ndim)
rem <- file.remove(file)
newmap <- newmap$locimap
colnames(newmap)[2] <- "marker"

return(newmap)
}

Expand Down Expand Up @@ -58,7 +57,7 @@ linkdf_shortcut <- function(geno,ploidy,p1name,p2name,ncores = 1){
ncores = ncores,
marker_assignment = ma,
LG_number = 1,verbose = F,
convert_palindrome_markers = F)$LG1
convert_palindrome_markers = T)$LG1
linkdf_p2 <- polymapR::finish_linkage_analysis(dosage_matrix = as.matrix(geno),
target_parent = p2name,
other_parent = p1name,
Expand All @@ -67,7 +66,7 @@ linkdf_shortcut <- function(geno,ploidy,p1name,p2name,ncores = 1){
ncores = ncores,
marker_assignment = ma,
LG_number = 1,verbose = F,
convert_palindrome_markers = F)$LG1
convert_palindrome_markers = T)$LG1

linkdf <- rbind(linkdf_p1,linkdf_p2)
return(linkdf)
Expand Down
2 changes: 1 addition & 1 deletion R/pred_IBD.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ predict_IBD <- function(IBD,map,interval = 10, method = "kosambi",
}
}else if(type[1] == "data.frame"){
predict_IBD(IBD,map,interval, method = method,
non_inf = non_inf,pred_points = pred_points)
non_inf = non_inf,pred_points = pred_points)
}

UseMethod("predict_IBD",IBD)
Expand Down
191 changes: 125 additions & 66 deletions R/smooth_wrapper.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,15 @@
#' IBD probability non-informative (if they fall within the threshold they will be
#' ignored during prediction). Defaults to 0.3 - 0.7. Symmetrical boundaries are
#' recommended but not necessary.
#' @param obs.method character, either "naive" or "heuristic" (or substrings). This parameter allows to
#' switch between using the IBD calculation (for observed IBDs) described in the Smooth Descent paper, or the
#' heuristic method from `polyqtlR`. However, our research has shown better results with the
#' naive method.
#' @param pred.method character, either "prediction" or "hmm" (or substrings). This parameter
#' allows to switch between using the IBD calculation (for predicted IBDs) between the weighted
#' average method or the Hidden Markov model implemented in `polyqtlR`. Our research shows better
#' results in polyploids with the HMM, although for high marker densities the
#' weighted average method is faster (specially if `prediction_points` is used)
#' @param verbose logical, should smooth descent report the steps it takes?
#'
#'
Expand Down Expand Up @@ -74,9 +83,12 @@ smooth_descent <- function(geno,
prediction_points = NULL,
error_threshold = 0.8,
non_inf = c(0.3,0.7),
verbose = T){
talk <- function(msg){
if(verbose) cat(msg)
verbose = T,
obs.method = "naive",
pred.method = "prediction",
hmm.error = 0.01){
talk <- function(...){
if(verbose) cat(...)
}
## INPUT CHECK ##
assertthat::assert_that(!is.null(rownames(geno)))
Expand Down Expand Up @@ -105,6 +117,11 @@ smooth_descent <- function(geno,
geno <- geno[map$marker,]
homologue <- homologue[map$marker,]

#Matching parameters
obs.method <- match.arg(obs.method,c("naive","heuristic"))
pred.method <- match.arg(pred.method,c("prediction","hmm"))
talk("Will estimate errors with",obs.method,"-",pred.method,"combination\n")


## Error estimation ##
if(is.null(p1name)) p1name <- colnames(geno)[1]
Expand All @@ -114,15 +131,24 @@ smooth_descent <- function(geno,
genocols <- which(colnames(geno) %in% c(p1name,p2name))

talk("Obtaining IBD\n")
#The parents should not be included in the IBD calculation
obsIBD <- calc_IBD(geno = as.matrix(geno[,-genocols]),
p1hom = as.matrix(homologue[,parentcols$p1]),
p2hom = as.matrix(homologue[,parentcols$p2]),
ploidy = ploidy)
obsIBD <- calc_IBD(geno = as.matrix(geno),
p1hom = as.matrix(homologue[,parentcols$p1]),
p2hom = as.matrix(homologue[,parentcols$p2]),
ploidy = ploidy,map = map, p1name = p1name, p2name = p2name,
method = obs.method)

talk("Predicting IBD\n")
predIBD <- predict_IBD(obsIBD, map, interval = prediction_interval,non_inf = non_inf,
pred_points = prediction_points)
if(pred.method == "hmm"){
predIBD <- calc_IBD(geno = as.matrix(geno),
p1hom = as.matrix(homologue[,parentcols$p1]),
p2hom = as.matrix(homologue[,parentcols$p2]),
ploidy = ploidy,map = map, p1name = p1name, p2name = p2name,
method = "hmm",hmm.error = hmm.error)
}else if(pred.method == "prediction"){
predIBD <- predict_IBD(obsIBD, map, interval = prediction_interval,non_inf = non_inf,
pred_points = prediction_points)
}

talk("Detecting errors\n")
errors <- lapply(1:length(obsIBD),function(i){
err <- abs(obsIBD[[i]] - predIBD[[i]]) > error_threshold
Expand All @@ -143,7 +169,9 @@ smooth_descent <- function(geno,
error = errors,
newIBD = obsIBD,
oldgeno = geno,
newgeno = geno)
newgeno = geno,
obs.method = obs.method,
pred.method = pred.method)

}else{
newIBD <- lapply(1:length(obsIBD),function(i){
Expand All @@ -164,18 +192,19 @@ smooth_descent <- function(geno,
new_geno <- new_geno[rownames(geno),colnames(geno[,-genocols])]
new_geno[is.na(new_geno)] <- geno[,-genocols][is.na(new_geno)]
new_geno <- cbind(geno[,genocols],new_geno)
newIBD <- calc_IBD(geno = as.matrix(new_geno[,-genocols]),
newIBD <- calc_IBD(geno = as.matrix(new_geno),
p1hom = as.matrix(homologue[,parentcols$p1]),
p2hom = as.matrix(homologue[,parentcols$p2]),
ploidy = ploidy)

ploidy = ploidy,method = obs.method, map = map)
res <- list(obsIBD = obsIBD,
predIBD = predIBD,
oldmap = map,
error = errors,
newIBD = newIBD,
oldgeno = geno,
newgeno = new_geno)
newgeno = new_geno,
obs.method = obs.method,
pred.method = pred.method)
}

#Diagnostics
Expand All @@ -190,7 +219,7 @@ smooth_descent <- function(geno,
pred = pred_rec,
new = new_rec)

res <- res[c("obsIBD","predIBD","error","newIBD","oldgeno","newgeno","oldmap","rec")]
res <- res[c("obsIBD","predIBD","error","newIBD","oldgeno","newgeno","oldmap","rec","obs.method","pred.method")]
return(res)
}

Expand Down Expand Up @@ -238,6 +267,15 @@ smooth_descent <- function(geno,
#' IBD probability non-informative (if they fall within the threshold they will be
#' ignored during prediction). Defaults to 0.3 - 0.7. Symmetrical boundaries are
#' recommended but not necessary.
#' @param obs.method character, either "naive" or "heuristic" (or substrings). This parameter allows to
#' switch between using the IBD calculation (for observed IBDs) described in the Smooth Descent paper, or the
#' heuristic method from `polyqtlR`. However, our research has shown better results with the
#' naive method.
#' @param pred.method character, either "prediction" or "hmm" (or substrings). This parameter
#' allows to switch between using the IBD calculation (for predicted IBDs) between the weighted
#' average method or the Hidden Markov model implemented in `polyqtlR`. Our research shows better
#' results in polyploids with the HMM, although for high marker densities the
#' weighted average method is faster (specially if `prediction_points` is used)
#' @param verbose logical, should smooth descent report the steps it takes?
#'
#' @return list containing the following items:
Expand Down Expand Up @@ -271,21 +309,24 @@ smooth_descent <- function(geno,
#' estimate_premap = F, mapping_ndim = 3, ncores = 1)
#' }
smooth_map <- function(geno,
homologue,
map = NULL,
ploidy = 2,
p1name = NULL,
p2name = NULL,
prediction_interval = 10,
prediction_threshold = 0.8,
prediction_points = NULL,
error_threshold = 0.8,
ncores = 1,
mapping_ndim = 2,
estimate_premap = F,
max_distance = 10,
non_inf = c(0.3,0.7),
verbose = T){
homologue,
map = NULL,
ploidy = 2,
p1name = NULL,
p2name = NULL,
prediction_interval = 10,
prediction_threshold = 0.8,
prediction_points = NULL,
error_threshold = 0.8,
ncores = 1,
mapping_ndim = 2,
estimate_premap = F,
max_distance = 10,
non_inf = c(0.3,0.7),
verbose = T,
obs.method = "naive",
pred.method = "prediction",
hmm.error = 0.01){
talk <- function(msg){
if(verbose) cat(msg)
}
Expand All @@ -301,8 +342,6 @@ smooth_map <- function(geno,
p1name = p1name, p2name = p2name,
ncores = ncores)
map <- mdsmap(linkdf,mapping_ndim)
map <- map$locimap
colnames(map)[2] <- "marker"
}

#We should eliminate markers that are distant from everything
Expand All @@ -318,12 +357,13 @@ smooth_map <- function(geno,
map <- map[!far_marks,]

res <- smooth_descent(geno = geno, homologue = homologue, map = map,
ploidy = ploidy, p1name = p1name, p2name = p2name,
prediction_interval = prediction_interval,
prediction_threshold = prediction_threshold,
prediction_points = prediction_points,
error_threshold = error_threshold,
verbose = verbose)
ploidy = ploidy, p1name = p1name, p2name = p2name,
prediction_interval = prediction_interval,
prediction_threshold = prediction_threshold,
prediction_points = prediction_points,
error_threshold = error_threshold,
verbose = verbose, obs.method = obs.method,
pred.method = pred.method,hmm.error = hmm.error)

talk("Estimating new linkage\n")
geno <- as.matrix(res$newgeno)
Expand All @@ -333,8 +373,6 @@ smooth_map <- function(geno,

talk("Mapping new genotypes\n")
new_map <- mdsmap(linkdf,mapping_ndim)
new_map <- new_map$locimap
colnames(new_map)[2] <- "marker"

#Diagnostics and output formatting
res$oldmap = map
Expand Down Expand Up @@ -393,6 +431,15 @@ smooth_map <- function(geno,
#' IBD probability non-informative (if they fall within the threshold they will be
#' ignored during prediction). Defaults to 0.3 - 0.7. Symmetrical boundaries are
#' recommended but not necessary.
#' @param obs.method character, either "naive" or "heuristic" (or substrings). This parameter allows to
#' switch between using the IBD calculation (for observed IBDs) described in the Smooth Descent paper, or the
#' heuristic method from `polyqtlR`. However, our research has shown better results with the
#' naive method.
#' @param pred.method character, either "prediction" or "hmm" (or substrings). This parameter
#' allows to switch between using the IBD calculation (for predicted IBDs) between the weighted
#' average method or the Hidden Markov model implemented in `polyqtlR`. Our research shows better
#' results in polyploids with the HMM, although for high marker densities the
#' weighted average method is faster (specially if `prediction_points` is used)
#' @param verbose logical, should smooth descent report the steps it takes?
#'
#' @return list of lists, where each element contains the following items:
Expand Down Expand Up @@ -442,7 +489,10 @@ smooth_map_iter <- function(geno,
estimate_premap = F,
max_distance = 10,
non_inf = c(0.3,0.7),
verbose = T){
verbose = T,
obs.method = "naive",
pred.method = "prediction",
hmm.error = 0.01){

talk <- function(msg){
if(verbose) cat(msg)
Expand All @@ -464,41 +514,50 @@ smooth_map_iter <- function(geno,
res <- list()
for(i in 1:iters){
talk(paste0("Iteration ",i," ----------\n"))
tic <- Sys.time()
if(i == 1){
this_iter <- try(smooth_map(geno = geno, homologue = homologue,
map = map, ploidy = ploidy, p1name = p1name,
p2name = p2name, prediction_interval = prediction_interval,
prediction_threshold = prediction_threshold,
error_threshold = err[i],
ncores = ncores,
mapping_ndim = mapping_ndim, estimate_premap = estimate_premap,
max_distance = max_distance, verbose = verbose,
prediction_points = prediction_points))
map = map, ploidy = ploidy, p1name = p1name,
p2name = p2name, prediction_interval = prediction_interval,
prediction_threshold = prediction_threshold,
error_threshold = err[i],
ncores = ncores,
mapping_ndim = mapping_ndim, estimate_premap = estimate_premap,
max_distance = max_distance, verbose = verbose,
prediction_points = prediction_points, obs.method = obs.method,
pred.method = pred.method,hmm.error = hmm.error))
}else{
#In the next iterations, a new starting map and new genotype is used
if(class(this_iter) == "try-error") next
if(class(this_iter) == "try-error"){
this_iter <- list()
next
}
m <- this_iter$newmap
g <- this_iter$newgeno
this_iter <- try(smooth_map(geno = g,
map = m,
homologue = homologue,
ploidy = ploidy,
p1name = p1name,
p2name = p2name,
prediction_interval = prediction_interval,
prediction_threshold = prediction_threshold,
prediction_points = prediction_points,
error_threshold = err[i],
ncores = ncores,
mapping_ndim = mapping_ndim,
max_distance = max_distance,
verbose = verbose))
map = m,
homologue = homologue,
ploidy = ploidy,
p1name = p1name,
p2name = p2name,
prediction_interval = prediction_interval,
prediction_threshold = prediction_threshold,
prediction_points = prediction_points,
error_threshold = err[i],
ncores = ncores,
mapping_ndim = mapping_ndim,
max_distance = max_distance,
verbose = verbose,
obs.method = obs.method,
pred.method = pred.method,
hmm.error = hmm.error))
}
toc <- Sys.time()
this_iter$error_threshold <- err[i]
this_iter$time <- difftime(toc,tic, units = "secs")
res[[i]] <- this_iter
res[[i]]$error_threshold <- err[i]
}

names(res) <- paste0("iter",1:length(res))
return(res)
}

Loading

0 comments on commit ddfe161

Please sign in to comment.