From c8633bd697381f52e9e72a13ff9ce45949dbd8d7 Mon Sep 17 00:00:00 2001
From: Kritika Verma <95202116+SunSummoner@users.noreply.github.com>
Date: Sun, 20 Oct 2024 16:10:23 +0530
Subject: [PATCH 1/4] Made changes in assign_job_queue.R, cleanup.R, lineage.R,
 msa.R, tree.R

---
 R/assign_job_queue.R | 4 +++-
 R/cleanup.R          | 4 ++--
 R/globals.R          | 4 ++++
 R/lineage.R          | 8 ++++----
 R/msa.R              | 2 +-
 R/tree.R             | 6 +++---
 6 files changed, 17 insertions(+), 11 deletions(-)
 create mode 100644 R/globals.R

diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index bc5253d4..033482c2 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -50,6 +50,8 @@ map_advanced_opts2procs <- function(advanced_opts) {
 #'
 #' @importFrom dplyr across everything select summarise
 #'
+#' @importFrom stats median
+#' 
 #' @return [list] names: processes; values: median runtime (seconds)
 #'
 #' see molevol_scripts/R/metrics.R for info on functions called here
@@ -126,7 +128,7 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
             names_to = "process",
             values_to = "median_seconds"
         ) |>
-        dplyr::arrange(dplyr::desc(median_seconds))
+        dplyr::arrange(dplyr::desc(.data$median_seconds))
     readr::write_tsv(df_proc_medians, file = filepath)
     return(df_proc_medians)
 }
diff --git a/R/cleanup.R b/R/cleanup.R
index 4fe074ee..6716d245 100755
--- a/R/cleanup.R
+++ b/R/cleanup.R
@@ -697,7 +697,7 @@ selectLongestDuplicate <- function(prot, column) {
 
     # Get list of duplicates
     dups <- prot %>%
-        group_by(AccNum) %>%
+        group_by(.data$AccNum) %>%
         summarize("count" = n()) %>%
         filter(count > 1) %>%
         arrange(-count) %>%
@@ -708,7 +708,7 @@ selectLongestDuplicate <- function(prot, column) {
     longest_rows <- c()
     remove_rows <- c()
     for (acc in dup_acc) {
-        dup_rows <- dups %>% filter(AccNum == acc)
+        dup_rows <- dups %>% filter(.data$AccNum == acc)
 
         longest <- dup_rows[which(nchar(pull(dup_rows, {{ col }})) == max(nchar(pull(dup_rows, {{ col }}))))[1], "row.orig"]
 
diff --git a/R/globals.R b/R/globals.R
new file mode 100644
index 00000000..76da9d5d
--- /dev/null
+++ b/R/globals.R
@@ -0,0 +1,4 @@
+# File to put all data.table variables that aren't defined otherwise
+
+#' @importFrom utils globalVariables
+utils::globalVariables(c("Protein"))
\ No newline at end of file
diff --git a/R/lineage.R b/R/lineage.R
index d14246d7..a461acc2 100644
--- a/R/lineage.R
+++ b/R/lineage.R
@@ -335,10 +335,10 @@ IPG2Lineage <- function(accessions, ipg_file,
     {
         # browser()
         acc <- accessions[i]
-        acc_inds <- which(mergedTax$Protein == acc)
+        acc_inds <- which(.data$mergedTax$Protein == acc)
         if (length(acc_inds) != 0) {
             # refseq inds take precedence
-            refseq_inds <- acc_inds[which(mergedTax[acc_inds, ]$Source == "RefSeq")]
+            refseq_inds <- acc_inds[which(.data$mergedTax[acc_inds, ]$Source == "RefSeq")]
             if (length(refseq_inds) != 0) {
                 # Take the first first row of the refseq (smallest index)
                 refseq_rows[i] <- refseq_inds[1]
@@ -364,7 +364,7 @@ IPG2Lineage <- function(accessions, ipg_file,
     }
     if (length(genbank_rows) != 0) {
         genbank_ipg_dt <- ipg_dt[genbank_rows, ]
-        genbank_lins <- GCA2Lineage(gca_ipg_dt,
+        genbank_lins <- GCA2Lineage(.data$gca_ipg_dt,
             assembly_path = genbank_assembly_path,
             lineagelookup_path
         )
@@ -372,7 +372,7 @@ IPG2Lineage <- function(accessions, ipg_file,
 
 
     lins <- GCA2Lineage(prot_data = ipg_dt, assembly_path, lineagelookup_path)
-    lins <- lins[!is.na(Lineage)] %>% unique()
+    lins <- lins[!is.na(.data$Lineage)] %>% unique()
 
     return(lins)
 }
diff --git a/R/msa.R b/R/msa.R
index e56cc32c..6d0e90c1 100644
--- a/R/msa.R
+++ b/R/msa.R
@@ -191,7 +191,7 @@ msa_pdf <- function(fasta_path, out_path = NULL,
 #' @param outfile
 #'
 #' @importFrom Biostrings readAAStringSet
-#'
+#' @importFrom stats kalign
 #' @return
 #' @export
 #'
diff --git a/R/tree.R b/R/tree.R
index 5cdc20d1..8e736c98 100755
--- a/R/tree.R
+++ b/R/tree.R
@@ -55,15 +55,15 @@
 #'                 here("src/FastTree")
 #' }
 convertFA2Tree <- function(fa_path = here("data/alns/pspa_snf7.fa"),
-    tre_path = here("data/alns/pspa_snf7.tre"),
+    .data$tre_path = here("data/alns/pspa_snf7.tre"),
     fasttree_path = here("src/FastTree")) {
     # fa_path=here("data/alns/pspa_snf7.fa")
-    # tre_path=here("data/alns/pspa_snf7.tre")
+    # .data$tre_path=here("data/alns/pspa_snf7.tre")
     # fasttree_path=here("src/FastTree")
     print(fa_path)
     system2(
         command = fasttree_path,
-        args = paste(c(fa_path, ">", tre_path),
+        args = paste(c(fa_path, ">", .data$tre_path),
             sep = "", collapse = " "
         )
     )

From d1bedabec56fe2e2ca08cb5f74a857a5d51ba3b2 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Thu, 31 Oct 2024 09:44:39 -0600
Subject: [PATCH 2/4] remove .data prefix from param

---
 R/tree.R | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/R/tree.R b/R/tree.R
index ba490103..5679de34 100755
--- a/R/tree.R
+++ b/R/tree.R
@@ -57,11 +57,8 @@
 #'                 here("src/FastTree")
 #' }
 convertFA2Tree <- function(fa_path = here("data/alns/pspa_snf7.fa"),
-    .data$tre_path = here("data/alns/pspa_snf7.tre"),
+    tre_path = here("data/alns/pspa_snf7.tre"),
     fasttree_path = here("src/FastTree")) {
-    # fa_path=here("data/alns/pspa_snf7.fa")
-    # .data$tre_path=here("data/alns/pspa_snf7.tre")
-    # fasttree_path=here("src/FastTree")
     
     # Check if the FASTA file exists
     if (!file.exists(fa_path)) {

From 1a68176b9e765006391af63b4a0c29a62d6888cd Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Thu, 31 Oct 2024 09:45:11 -0600
Subject: [PATCH 3/4] kalign seems to come from an external GitHub repo --
 removing from namespace

---
 NAMESPACE | 2 ++
 R/msa.R   | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index 6ae464f6..3a7b5c03 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -247,6 +247,7 @@ importFrom(shiny,showNotification)
 importFrom(stats,as.formula)
 importFrom(stats,complete.cases)
 importFrom(stats,logLik)
+importFrom(stats,median)
 importFrom(stats,na.omit)
 importFrom(stringi,stri_extract_all_regex)
 importFrom(stringi,stri_replace_all_regex)
@@ -276,6 +277,7 @@ importFrom(tidyr,replace_na)
 importFrom(tidyr,separate)
 importFrom(tidyr,unite)
 importFrom(utils,combn)
+importFrom(utils,globalVariables)
 importFrom(viridis,scale_fill_viridis)
 importFrom(visNetwork,visEdges)
 importFrom(visNetwork,visGroups)
diff --git a/R/msa.R b/R/msa.R
index 6919f098..f0ee4b07 100644
--- a/R/msa.R
+++ b/R/msa.R
@@ -196,7 +196,7 @@ createMSA_PDF <- function(fasta_path, out_path = NULL,
 #' will be saved.
 #'
 #' @importFrom Biostrings readAAStringSet
-#' @importFrom stats kalign
+#' 
 #' @return A list containing the alignment object and the output file path.
 #' @export
 #'

From 1ba4da67b1ce2cd65f80f5222a6ada6101a5e521 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Fri, 1 Nov 2024 10:49:37 -0600
Subject: [PATCH 4/4] revert .data additions

---
 R/lineage.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/lineage.R b/R/lineage.R
index 49ffd08d..fa113654 100644
--- a/R/lineage.R
+++ b/R/lineage.R
@@ -395,10 +395,10 @@ IPG2Lineage <- function(accessions, ipg_file,
     {
         # browser()
         acc <- accessions[i]
-        acc_inds <- which(.data$mergedTax$Protein == acc)
+        acc_inds <- which(mergedTax$Protein == acc)
         if (length(acc_inds) != 0) {
             # refseq inds take precedence
-            refseq_inds <- acc_inds[which(.data$mergedTax[acc_inds, ]$Source == "RefSeq")]
+            refseq_inds <- acc_inds[which(mergedTax[acc_inds, ]$Source == "RefSeq")]
             if (length(refseq_inds) != 0) {
                 # Take the first first row of the refseq (smallest index)
                 refseq_rows[i] <- refseq_inds[1]
@@ -424,7 +424,7 @@ IPG2Lineage <- function(accessions, ipg_file,
     }
     if (length(genbank_rows) != 0) {
         genbank_ipg_dt <- ipg_dt[genbank_rows, ]
-        genbank_lins <- GCA2Lineage(.data$gca_ipg_dt,
+        genbank_lins <- GCA2Lineage(gca_ipg_dt,
             assembly_path = genbank_assembly_path,
             lineagelookup_path
         )