Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactored function names in R/summarize.R #79

Merged
merged 5 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ export(convertAlignment2FA)
export(convertAlignment2Trees)
export(convertFA2Tree)
export(convert_aln2fa)
export(count_bycol)
export(countByColumn)
export(createFA2Tree)
export(createWordCloud2Element)
export(createWordCloudElement)
Expand All @@ -41,9 +41,9 @@ export(domain_network)
export(downloadAssemblySummary)
export(efetchIPG)
export(extractAccNum)
export(filter_by_doms)
export(filter_freq)
export(find_paralogs)
export(filterByDomains)
export(filterByFrequency)
export(findParalogs)
export(find_top_acc)
export(format_job_args)
export(gc_undirected_network)
Expand Down Expand Up @@ -91,15 +91,15 @@ export(selectLongestDuplicate)
export(send_job_status_email)
export(shortenLineage)
export(sinkReset)
export(summ.DA)
export(summ.DA.byLin)
export(summ.GC)
export(summ.GC.byDALin)
export(summ.GC.byLin)
export(summarize_bylin)
export(summarizeByLineage)
export(summarizeDomArch)
export(summarizeDomArch_ByLineage)
export(summarizeGenContext)
export(summarizeGenContext_ByDomArchLineage)
export(summarizeGenContext_ByLineage)
export(theme_genes2)
export(to_titlecase)
export(total_counts)
export(totalGenContextOrDomArchCounts)
export(validateCountDF)
export(wordcloud3)
export(write.MsaAAMultipleAlignment)
Expand Down
2 changes: 1 addition & 1 deletion R/cleanup.R
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ cleanDomainArchitecture <- function(prot, old = "DomArch.orig", new = "DomArch",
# Remove rows with no domains contained within domains_keep
# filter(grepl(domains_for_grep, DomArch))
if (!is.null(domains_keep)) {
prot <- prot %>% filter_by_doms(column = new, doms_keep = domains_keep$domains)
prot <- prot %>% filterByDomains(column = new, doms_keep = domains_keep$domains)
}

# ##!! NOT RUN !!
Expand Down
8 changes: 4 additions & 4 deletions R/networks_domarch.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ domain_network <- function(prot, column = "DomArch", domains_of_interest, cutoff
{
column_name <- sym(column)

prot_tc <- prot %>% total_counts(column = column, cutoff = cutoff, RowsCutoff = F, digits = 5)
prot_tc <- prot %>% totalGenContextOrDomArchCounts(column = column, cutoff = cutoff, RowsCutoff = F, digits = 5)

# ensure only Domains that are in the tc cutoff range are kept
within_list <- prot_tc %>%
Expand Down Expand Up @@ -95,7 +95,7 @@ domain_network <- function(prot, column = "DomArch", domains_of_interest, cutoff
# cleanup domain list
domain.list <- domain.list$DomArch.ntwrk %>% str_split(pattern = "\\+")
# Get a table of domain counts
wc <- elements2words(prot = prot, column = column, conversion_type = "da2doms") %>% words2wc()
wc <- elements2Words(prot = prot, column = column, conversion_type = "da2doms") %>% words2WordCounts()
wc <- pivot_wider(wc, names_from = words, values_from = freq)

# Remove all isolated domarchs, such that an adjacency list can easily be constructed
Expand Down Expand Up @@ -262,7 +262,7 @@ BinaryDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, c

column_name <- sym(column)

prot_tc <- prot %>% total_counts(column = column, cutoff = cutoff, RowsCutoff = F, digits = 5)
prot_tc <- prot %>% totalGenContextOrDomArchCounts(column = column, cutoff = cutoff, RowsCutoff = F, digits = 5)

within_list <- prot_tc %>%
select({{ column_name }}) %>%
Expand All @@ -286,7 +286,7 @@ BinaryDomainNetwork <- function(prot, column = "DomArch", domains_of_interest, c
domain.list <- domain.list$DomArch.ntwrk %>% str_split(pattern = "\\+")

# Get domain counts before eliminating domarchs with no edges
wc <- elements2words(prot = prot, column = column, conversion_type = "da2doms") %>% words2wc()
wc <- elements2Words(prot = prot, column = column, conversion_type = "da2doms") %>% words2WordCounts()

nodes <- data.frame(id = wc$words, label = wc$words, size = wc$freq) %>%
mutate(group = purrr::map(
Expand Down
12 changes: 6 additions & 6 deletions R/networks_gencontext.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ gc_undirected_network <- function(prot, column = "GenContext", domains_of_intere
column_name <- sym(column)
if (cutoff_type == "Lineage") {
lin_summary <- prot %>%
summ.DA.byLin() %>%
summ.DA()
summarizeDomArch_ByLineage() %>%
summarizeDomArch()
doms_above_cutoff <- (lin_summary %>% filter(totallin >= cutoff))[[column]]
} else if (cutoff_type == "Total Count") { # Change this type?
GC_above_cutoff <- (prot %>% total_counts(column = column, cutoff = cutoff))[[column]]
GC_above_cutoff <- (prot %>% totalGenContextOrDomArchCounts(column = column, cutoff = cutoff))[[column]]
}

prot <- prot[which(prot[[as_string(column_name)]] %in% GC_above_cutoff), ]
Expand Down Expand Up @@ -153,8 +153,8 @@ GenContextNetwork <- function(prot, domains_of_interest, column = "GenContext",
column_name <- sym(column)


# Perform cutoff through total_counts
prot_tc <- prot %>% total_counts(column = column, cutoff = cutoff)
# Perform cutoff through totalGenContextOrDomArchCounts
prot_tc <- prot %>% totalGenContextOrDomArchCounts(column = column, cutoff = cutoff)

within_list <- prot_tc %>%
select({{ column_name }}) %>%
Expand Down Expand Up @@ -218,7 +218,7 @@ GenContextNetwork <- function(prot, domains_of_interest, column = "GenContext",
}

# Get domain counts before eliminating domarchs with no edges
wc <- elements2words(prot = prot, column = column, conversion_type = "gc2da") %>% words2wc()
wc <- elements2Words(prot = prot, column = column, conversion_type = "gc2da") %>% words2WordCounts()
nodes <- data.frame(id = wc$words, label = wc$words, size = wc$freq)

max_size <- max(nodes$size)
Expand Down
22 changes: 11 additions & 11 deletions R/plotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ plotUpSet <- function(query_data = "toast_rack.sub",
# colname = string(colname)
tryCatch(
{
tc <- query_data %>% total_counts(column = colname, cutoff = cutoff, RowsCutoff = RowsCutoff, digits = 5)
tc <- query_data %>% totalGenContextOrDomArchCounts(column = colname, cutoff = cutoff, RowsCutoff = RowsCutoff, digits = 5)
##### Remove Tails ####
# tails comprise of less than 1% of data each
# ie) individual percent is less than 1
Expand All @@ -128,7 +128,7 @@ plotUpSet <- function(query_data = "toast_rack.sub",
words.tc <- tc %>%
select({{ column }}) %>%
distinct() %>%
elements2words(column = colname, conversion_type = type)
elements2Words(column = colname, conversion_type = type)
# names(words.tc)[1] <- "words"
words.tc <- words.tc %>% str_split(pattern = " ")
words.tc <- as.data.frame(words.tc, col.names = "Words", stringsAsFactors = F) %>%
Expand Down Expand Up @@ -273,7 +273,7 @@ plotLineageDA <- function(query_data = "prot",

query_data <- shortenLineage(query_data, "Lineage", abr_len = 1)

query.summ.byLin <- query_data %>% total_counts(cutoff = cutoff, column = colname, RowsCutoff = RowsCutoff)
query.summ.byLin <- query_data %>% totalGenContextOrDomArchCounts(cutoff = cutoff, column = colname, RowsCutoff = RowsCutoff)

query.summ.byLin$Lineage <- map(query.summ.byLin$Lineage, function(x) str_replace_all(string = x, pattern = ">", replacement = "_")) %>%
unlist()
Expand Down Expand Up @@ -390,7 +390,7 @@ plotLineageQuery <- function(query_data = all,
}
col <- sym(colname)

query_data <- query_data %>% total_counts(column = colname, cutoff = cutoff)
query_data <- query_data %>% totalGenContextOrDomArchCounts(column = colname, cutoff = cutoff)
# query_data contains all rows that possess a lineage
query_data <- query_data %>% filter(grepl("a", Lineage))

Expand Down Expand Up @@ -831,7 +831,7 @@ plotStackedLineage <- function(prot, column = "DomArch", cutoff, Lineage_col = "
prot <- shortenLineage(prot, Lineage_col, abr_len = 3)
}

total_count <- total_counts(prot, column, cutoff, lineage_col = Lineage_col)
total_count <- totalGenContextOrDomArchCounts(prot, column, cutoff, lineage_col = Lineage_col)
# total_count = prot

# Order bars by descending freq
Expand Down Expand Up @@ -1054,7 +1054,7 @@ createWordCloudElement <- function(query_data = "prot",
colname = "DomArch",
cutoff = 70,
UsingRowsCutoff = FALSE) {
tc <- query_data %>% total_counts(column = colname, cutoff = cutoff, RowsCutoff = UsingRowsCutoff, digits = 5)
tc <- query_data %>% totalGenContextOrDomArchCounts(column = colname, cutoff = cutoff, RowsCutoff = UsingRowsCutoff, digits = 5)

column <- sym(colname)
# Get words from filter
Expand All @@ -1069,11 +1069,11 @@ createWordCloudElement <- function(query_data = "prot",
}

words.tc <- query_data %>%
elements2words(
elements2Words(
column = colname,
conversion_type = type
) %>%
words2wc()
words2WordCounts()

# names(words.tc) <- c("words", "freq")

Expand Down Expand Up @@ -1134,7 +1134,7 @@ createWordCloud2Element <- function(query_data = "prot",
# @param type Character. Default is "da2doms" for Domain Architectures.
# Other alternative: "gc2da" for Genomic Contexts.

tc <- query_data %>% total_counts(column = colname, cutoff = cutoff, RowsCutoff = UsingRowsCutoff, digits = 5)
tc <- query_data %>% totalGenContextOrDomArchCounts(column = colname, cutoff = cutoff, RowsCutoff = UsingRowsCutoff, digits = 5)

column <- sym(colname)
query_data <- query_data %>% filter({{ column }} %in% pull(tc, {{ colname }}))
Expand All @@ -1146,11 +1146,11 @@ createWordCloud2Element <- function(query_data = "prot",
}

words.tc <- query_data %>%
elements2words(
elements2Words(
column = colname,
conversion_type = type
) %>%
words2wc()
words2WordCounts()

names(words.tc) <- c("words", "freq")

Expand Down
Loading
Loading