Skip to content

Commit

Permalink
update paths and address suggestions
Browse files Browse the repository at this point in the history
Signed-off-by: Awa Synthia <[email protected]>
  • Loading branch information
awasyn committed Nov 20, 2024
1 parent 9ce780b commit 397c02c
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 49 deletions.
2 changes: 1 addition & 1 deletion R/ipr2viz.R
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ plotIPR2Viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
analysis_labeler <- analyses %>%
pivot_wider(names_from = .data$Analysis, values_from = .data$Analysis)

lookup_tbl_path <- "~/awasyn/new_trial/cln_lookup_tbl.tsv"
system.file("common_data", "cln_lookup_tbl.tsv", package = "MolEvolvR", mustWork = TRUE)
lookup_tbl <- read_tsv(lookup_tbl_path, col_names = T, col_types = MolEvolvR::lookup_table_cols)

lookup_tbl <- lookup_tbl %>% select(-.data$ShortName) # Already has ShortName -- Just needs SignDesc
Expand Down
31 changes: 0 additions & 31 deletions inst/report/scripts/MolEvolData_class.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,37 +41,6 @@ setClass("seqUpload",
)
)

# Group by lineage + DA then take top 20
top_acc <- function(cln_file, DA_col = "DomArch.Pfam",
lin_col = "Lineage", n = 20) {
lin_sym <- sym(lin_col)
DA_sym <- sym(DA_col)

cln <- fread(cln_file, sep = "\t", fill = T)

grouped <- cln %>%
group_by({{ lin_sym }}, {{ DA_sym }}) %>%
summarise(count = n()) %>%
arrange(-count) %>%
filter(!is.na({{ lin_sym }}) & !is.na({{ DA_sym }}))

top_acc <- character(n)
for (r in 1:min(nrow(grouped), n))
{
l <- (grouped %>% pull({{ lin_sym }}))[r]
DA <- (grouped %>% pull({{ DA_sym }}))[r]

filt <- cln %>% filter({{ lin_sym }} == l & {{ DA_sym }} == DA)

top <- filt[which(filt$PcPositive == max(filt$PcPositive))[1], ]

top_acc[r] <- top$AccNum
}
top_acc <- top_acc[which(top_acc != "")]
return(top_acc)
}


combineFilesNopmap <- function(inpath, pattern, outpath,
delim = "\t", skip = 0,
col_names) {
Expand Down
6 changes: 3 additions & 3 deletions inst/report/scripts/generate_report.R
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ runAnalysis <- function(

# After uploading the sequence data, you would check the uploaded data
if (sequence_upload_data@seqs == "") {
stop("Error: Please upload a protein sequence")
stop("Error: Please upload a protein sequence.")
}
OUT_PATH <- getwd()
unavailable_pins <- list.files(OUT_PATH)
Expand Down Expand Up @@ -339,7 +339,7 @@ runAnalysis <- function(
validateFasta(tmp_file)
},
error = function(e) {
warning("Error: Failed to run input FASTA verification")
warning("Error: Failed to run input FASTA verification.")
return(FALSE) # Return FALSE if an error occurs
},
finally = {
Expand Down Expand Up @@ -526,7 +526,7 @@ runAnalysis <- function(
}
if (ipr_upload_data()@seqs == "" && !ipr_ncbi_check) {
stop("Error: Please provide a file containing sequences or check the
box to use fetch sequences for NCBI accession numbers.")
box to fetch sequences using NCBI accession numbers.")
}

ipr <- read_tsv(ipr_upload_data()@df, col_names = FALSE)
Expand Down
30 changes: 16 additions & 14 deletions inst/report/scripts/run_molevolvr_pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ runMolevolvrPipeline <- function(input_paths, db, nhits, eval,
# setwd(OUTDIR)

# Run DELTABLAST
runDeltablast(input_paths, PREFIX, OUTDIR, db, nhits, eval)
runDELTABLAST(input_paths, PREFIX, OUTDIR, db, nhits, eval)

# Run ACC2FA
convertAccNum2Fasta(file.path(OUTDIR, paste0(PREFIX, ".dblast.tsv")),
Expand All @@ -207,7 +207,7 @@ runMolevolvrPipeline <- function(input_paths, db, nhits, eval,
# Sys.sleep(30)

# Run BLASTCLUST
runBlastclust(file.path(OUTDIR, paste0(PREFIX, ".all_accnums.fa")),
runCDHIT(file.path(OUTDIR, paste0(PREFIX, ".all_accnums.fa")),
PREFIX, OUTDIR )

# Convert clusters to table
Expand Down Expand Up @@ -541,7 +541,7 @@ replaceAccNums <- function(path_acc2info,
}


runDeltablast <- function(infile, prefix, outdir,
runDELTABLAST <- function(infile, prefix, outdir,
db = "refseq_protein",
nhits = 5000, evalue = 1e-5,
threads = 10) {
Expand Down Expand Up @@ -700,9 +700,9 @@ cleanupBlast <- function(infile_blast, acc2info, prefix, wblast = F) {

# TaxID to lineage mapping
cleanedup_blast$TaxID <- as.integer(cleanedup_blast$TaxID)
lineage_map <- fread("~/awasyn/new_trial/lineage_lookup.txt",
header = TRUE, fill = TRUE,
colClasses = lineage_map_cols)
lineage_map <- fread(
system.file("common_data", "lineage_lookup.txt", package = "MolEvolvR", mustWork = TRUE),
header = TRUE, fill = TRUE, colClasses = lineage_map_cols)

# Merge with lineage map and clean up columns
mergedLins <- merge(cleanedup_blast, lineage_map, by = "TaxID",
Expand All @@ -722,7 +722,7 @@ cleanupBlast <- function(infile_blast, acc2info, prefix, wblast = F) {


# Function to run BLASTCLUST on given input
runBlastclust <- function(infile, suffix, outdir) {
runCDHIT <- function(infile, suffix, outdir) {

# Prepare output file path
outfile <- file.path(outdir, paste0(suffix, ".bclust.L60S80.tsv"))
Expand All @@ -734,13 +734,13 @@ runBlastclust <- function(infile, suffix, outdir) {
cat("## Now running BLASTCLUST on file(s):", infile, "\n")
cat("#####################################\n")

# Run BLASTCLUST
# Cluster sequences w/ BLASTCLUST/CD-HIT
# blastclust_cmd <- paste("blastclust -i", infile, "-o", outfile, "-p T -L .6 -b T -S 80 -a 8")
cdhit_command <- sprintf(
"cd-hit -i %s -o %s -c 0.8 -aS 0.6 -T 8",
infile, outfile
)
clean_command <- sprintf(
clean_cdhit_format <- sprintf(
"awk '/^>Cluster/ {if(NR>1)printf \"\\n\"; next} /WP_/ {start=index($0, \"WP_\"); if(start) {end=index(substr($0, start), \"...\"); if (end == 0) end=length($0); printf \"%%s \", substr($0, start, end-1)}}' %s > %s",
input_file,
outfile
Expand All @@ -750,7 +750,7 @@ runBlastclust <- function(infile, suffix, outdir) {
# system(blastclust_cmd)

system(cdhit_command)
system(clean_command)
system(clean_cdhit_format)

}

Expand Down Expand Up @@ -881,8 +881,9 @@ ipr2Linear <- function(ipr, acc2info, prefix) {
ipr_tax <- left_join(ipr_in, acc2info_out, by = "AccNum")

# read in lineage map
lineage_map <- fread("~/awasyn/new_trial/lineage_lookup.txt",
header = T, fill = T)
lineage_map <- fread(
system.file("common_data", "lineage_lookup.txt", package = "MolEvolvR", mustWork = TRUE),
header = TRUE, fill = TRUE)

# merge ipr+info w/ lineage
# both tables have a species column, but only
Expand All @@ -895,8 +896,9 @@ ipr2Linear <- function(ipr, acc2info, prefix) {
select(-Species.x, -Species.y)

# add lookup table to iprscan file
lookup_tbl <- fread(input = "~/awasyn/new_trial/cln_lookup_tbl.tsv",
sep = "\t", header = T, fill = T) %>%
lookup_tbl <- fread(
system.file("common_data", "lineage_lookup.txt", package = "MolEvolvR", mustWork = TRUE),
sep = "\t", header = TRUE, fill = TRUE) %>%
distinct()
if ("AccNum.x" %in% names(ipr_lin)) {
ipr_lin <- ipr_lin %>%
Expand Down

0 comments on commit 397c02c

Please sign in to comment.