From c8633bd697381f52e9e72a13ff9ce45949dbd8d7 Mon Sep 17 00:00:00 2001 From: Kritika Verma <95202116+SunSummoner@users.noreply.github.com> Date: Sun, 20 Oct 2024 16:10:23 +0530 Subject: [PATCH 1/4] Made changes in assign_job_queue.R, cleanup.R, lineage.R, msa.R, tree.R --- R/assign_job_queue.R | 4 +++- R/cleanup.R | 4 ++-- R/globals.R | 4 ++++ R/lineage.R | 8 ++++---- R/msa.R | 2 +- R/tree.R | 6 +++--- 6 files changed, 17 insertions(+), 11 deletions(-) create mode 100644 R/globals.R diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R index bc5253d4..033482c2 100644 --- a/R/assign_job_queue.R +++ b/R/assign_job_queue.R @@ -50,6 +50,8 @@ map_advanced_opts2procs <- function(advanced_opts) { #' #' @importFrom dplyr across everything select summarise #' +#' @importFrom stats median +#' #' @return [list] names: processes; values: median runtime (seconds) #' #' see molevol_scripts/R/metrics.R for info on functions called here @@ -126,7 +128,7 @@ write_proc_medians_table <- function(dir_job_results, filepath) { names_to = "process", values_to = "median_seconds" ) |> - dplyr::arrange(dplyr::desc(median_seconds)) + dplyr::arrange(dplyr::desc(.data$median_seconds)) readr::write_tsv(df_proc_medians, file = filepath) return(df_proc_medians) } diff --git a/R/cleanup.R b/R/cleanup.R index 4fe074ee..6716d245 100755 --- a/R/cleanup.R +++ b/R/cleanup.R @@ -697,7 +697,7 @@ selectLongestDuplicate <- function(prot, column) { # Get list of duplicates dups <- prot %>% - group_by(AccNum) %>% + group_by(.data$AccNum) %>% summarize("count" = n()) %>% filter(count > 1) %>% arrange(-count) %>% @@ -708,7 +708,7 @@ selectLongestDuplicate <- function(prot, column) { longest_rows <- c() remove_rows <- c() for (acc in dup_acc) { - dup_rows <- dups %>% filter(AccNum == acc) + dup_rows <- dups %>% filter(.data$AccNum == acc) longest <- dup_rows[which(nchar(pull(dup_rows, {{ col }})) == max(nchar(pull(dup_rows, {{ col }}))))[1], "row.orig"] diff --git a/R/globals.R b/R/globals.R new file mode 100644 index 00000000..76da9d5d --- /dev/null +++ b/R/globals.R @@ -0,0 +1,4 @@ +# File to put all data.table variables that aren't defined otherwise + +#' @importFrom utils globalVariables +utils::globalVariables(c("Protein")) \ No newline at end of file diff --git a/R/lineage.R b/R/lineage.R index d14246d7..a461acc2 100644 --- a/R/lineage.R +++ b/R/lineage.R @@ -335,10 +335,10 @@ IPG2Lineage <- function(accessions, ipg_file, { # browser() acc <- accessions[i] - acc_inds <- which(mergedTax$Protein == acc) + acc_inds <- which(.data$mergedTax$Protein == acc) if (length(acc_inds) != 0) { # refseq inds take precedence - refseq_inds <- acc_inds[which(mergedTax[acc_inds, ]$Source == "RefSeq")] + refseq_inds <- acc_inds[which(.data$mergedTax[acc_inds, ]$Source == "RefSeq")] if (length(refseq_inds) != 0) { # Take the first first row of the refseq (smallest index) refseq_rows[i] <- refseq_inds[1] @@ -364,7 +364,7 @@ IPG2Lineage <- function(accessions, ipg_file, } if (length(genbank_rows) != 0) { genbank_ipg_dt <- ipg_dt[genbank_rows, ] - genbank_lins <- GCA2Lineage(gca_ipg_dt, + genbank_lins <- GCA2Lineage(.data$gca_ipg_dt, assembly_path = genbank_assembly_path, lineagelookup_path ) @@ -372,7 +372,7 @@ IPG2Lineage <- function(accessions, ipg_file, lins <- GCA2Lineage(prot_data = ipg_dt, assembly_path, lineagelookup_path) - lins <- lins[!is.na(Lineage)] %>% unique() + lins <- lins[!is.na(.data$Lineage)] %>% unique() return(lins) } diff --git a/R/msa.R b/R/msa.R index e56cc32c..6d0e90c1 100644 --- a/R/msa.R +++ b/R/msa.R @@ -191,7 +191,7 @@ msa_pdf <- function(fasta_path, out_path = NULL, #' @param outfile #' #' @importFrom Biostrings readAAStringSet -#' +#' @importFrom stats kalign #' @return #' @export #' diff --git a/R/tree.R b/R/tree.R index 5cdc20d1..8e736c98 100755 --- a/R/tree.R +++ b/R/tree.R @@ -55,15 +55,15 @@ #' here("src/FastTree") #' } convertFA2Tree <- function(fa_path = here("data/alns/pspa_snf7.fa"), - tre_path = here("data/alns/pspa_snf7.tre"), + .data$tre_path = here("data/alns/pspa_snf7.tre"), fasttree_path = here("src/FastTree")) { # fa_path=here("data/alns/pspa_snf7.fa") - # tre_path=here("data/alns/pspa_snf7.tre") + # .data$tre_path=here("data/alns/pspa_snf7.tre") # fasttree_path=here("src/FastTree") print(fa_path) system2( command = fasttree_path, - args = paste(c(fa_path, ">", tre_path), + args = paste(c(fa_path, ">", .data$tre_path), sep = "", collapse = " " ) ) From d1bedabec56fe2e2ca08cb5f74a857a5d51ba3b2 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Thu, 31 Oct 2024 09:44:39 -0600 Subject: [PATCH 2/4] remove .data prefix from param --- R/tree.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/R/tree.R b/R/tree.R index ba490103..5679de34 100755 --- a/R/tree.R +++ b/R/tree.R @@ -57,11 +57,8 @@ #' here("src/FastTree") #' } convertFA2Tree <- function(fa_path = here("data/alns/pspa_snf7.fa"), - .data$tre_path = here("data/alns/pspa_snf7.tre"), + tre_path = here("data/alns/pspa_snf7.tre"), fasttree_path = here("src/FastTree")) { - # fa_path=here("data/alns/pspa_snf7.fa") - # .data$tre_path=here("data/alns/pspa_snf7.tre") - # fasttree_path=here("src/FastTree") # Check if the FASTA file exists if (!file.exists(fa_path)) { From 1a68176b9e765006391af63b4a0c29a62d6888cd Mon Sep 17 00:00:00 2001 From: David Mayer Date: Thu, 31 Oct 2024 09:45:11 -0600 Subject: [PATCH 3/4] kalign seems to come from an external GitHub repo -- removing from namespace --- NAMESPACE | 2 ++ R/msa.R | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 6ae464f6..3a7b5c03 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -247,6 +247,7 @@ importFrom(shiny,showNotification) importFrom(stats,as.formula) importFrom(stats,complete.cases) importFrom(stats,logLik) +importFrom(stats,median) importFrom(stats,na.omit) importFrom(stringi,stri_extract_all_regex) importFrom(stringi,stri_replace_all_regex) @@ -276,6 +277,7 @@ importFrom(tidyr,replace_na) importFrom(tidyr,separate) importFrom(tidyr,unite) importFrom(utils,combn) +importFrom(utils,globalVariables) importFrom(viridis,scale_fill_viridis) importFrom(visNetwork,visEdges) importFrom(visNetwork,visGroups) diff --git a/R/msa.R b/R/msa.R index 6919f098..f0ee4b07 100644 --- a/R/msa.R +++ b/R/msa.R @@ -196,7 +196,7 @@ createMSA_PDF <- function(fasta_path, out_path = NULL, #' will be saved. #' #' @importFrom Biostrings readAAStringSet -#' @importFrom stats kalign +#' #' @return A list containing the alignment object and the output file path. #' @export #' From 1ba4da67b1ce2cd65f80f5222a6ada6101a5e521 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Fri, 1 Nov 2024 10:49:37 -0600 Subject: [PATCH 4/4] revert .data additions --- R/lineage.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/lineage.R b/R/lineage.R index 49ffd08d..fa113654 100644 --- a/R/lineage.R +++ b/R/lineage.R @@ -395,10 +395,10 @@ IPG2Lineage <- function(accessions, ipg_file, { # browser() acc <- accessions[i] - acc_inds <- which(.data$mergedTax$Protein == acc) + acc_inds <- which(mergedTax$Protein == acc) if (length(acc_inds) != 0) { # refseq inds take precedence - refseq_inds <- acc_inds[which(.data$mergedTax[acc_inds, ]$Source == "RefSeq")] + refseq_inds <- acc_inds[which(mergedTax[acc_inds, ]$Source == "RefSeq")] if (length(refseq_inds) != 0) { # Take the first first row of the refseq (smallest index) refseq_rows[i] <- refseq_inds[1] @@ -424,7 +424,7 @@ IPG2Lineage <- function(accessions, ipg_file, } if (length(genbank_rows) != 0) { genbank_ipg_dt <- ipg_dt[genbank_rows, ] - genbank_lins <- GCA2Lineage(.data$gca_ipg_dt, + genbank_lins <- GCA2Lineage(gca_ipg_dt, assembly_path = genbank_assembly_path, lineagelookup_path )