Skip to content

Commit

Permalink
Merge pull request #115 from Klangina/Issue-114
Browse files Browse the repository at this point in the history
resolves #114
  • Loading branch information
the-mayer authored Oct 30, 2024
2 parents c838c4e + 4667118 commit b758992
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 55 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ importFrom(readr,write_file)
importFrom(readr,write_lines)
importFrom(readr,write_tsv)
importFrom(rentrez,entrez_fetch)
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,abort)
importFrom(rlang,as_string)
Expand Down Expand Up @@ -274,6 +275,7 @@ importFrom(tidyr,pivot_wider)
importFrom(tidyr,replace_na)
importFrom(tidyr,separate)
importFrom(tidyr,unite)
importFrom(utils,combn)
importFrom(viridis,scale_fill_viridis)
importFrom(visNetwork,visEdges)
importFrom(visNetwork,visGroups)
Expand Down
13 changes: 7 additions & 6 deletions R/assign_job_queue.R
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,8 @@ assignJobQueue <- function(
#' @importFrom dplyr mutate select
#' @importFrom ggplot2 aes geom_line ggplot labs
#' @importFrom tibble as_tibble
#' @importFrom rlang warn abort inform
#' @importFrom utils combn
#' @importFrom rlang .data warn abort inform
#'
#' @return line plot object
#'
Expand Down Expand Up @@ -657,13 +658,13 @@ plotEstimatedWallTimes <- function() {
df_walltimes <- tidyr::gather(df_walltimes,
key = "advanced_opts",
value = "est_walltime",
n_inputs)
.data$n_inputs)
# sec to hrs
df_walltimes <- df_walltimes |>
dplyr::mutate(est_walltime = est_walltime / 3600)
p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = n_inputs,
y = est_walltime,
color = advanced_opts)) +
dplyr::mutate(est_walltime = .data$est_walltime / 3600)
p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = .data$n_inputs,
y = .data$est_walltime,
color = .data$advanced_opts)) +
ggplot2::geom_line() +
ggplot2::labs(
title = "MolEvolvR estimated runtimes",
Expand Down
82 changes: 35 additions & 47 deletions R/cleanup.R
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,9 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
#' @return A data frame with condensed repeated domains in the specified column.
#' @export
#'
#' @importFrom dplyr pull
#' @importFrom dplyr pull mutate
#' @importFrom stringr str_replace_all
#' @importFrom rlang .data :=
#'
#' @examples
#' \dontrun{
Expand All @@ -206,36 +207,23 @@ condenseRepeatedDomains <- function(prot, by_column = "DomArch", excluded_prots
regex_identify_repeats <- paste0("(?i)", regex_exclude, "\\b([a-z0-9_-]+)\\b(?:\\s+\\1\\b)+")

# !! FUNS is soft-deprecated. FIX!!!
prot[, by_column] <- prot %>%
pull(by_column) %>%
str_replace_all(., pattern = "\\.", replacement = "_d_") %>%
# str_replace_all(., pattern = " ", replacement = "_s_") %>%
str_replace_all(., pattern = " ", replacement = "_") %>%
str_replace_all(.,
pattern = "\\+",
replacement = " "
) %>% # Use a different placeholder other than space
str_replace_all(.,
pattern = "-",
replacement = "__"
) %>%
str_replace_all(.,
pattern = regex_identify_repeats,
replacement = "\\1(s)"
) %>%
str_replace_all(.,
pattern = "__",
replacement = "-"
) %>%
str_replace_all(.,
pattern = " ",
replacement = "+"
) %>%
# str_replace_all(., pattern = "_s_", replacement = " ") %>%
str_replace_all(., pattern = "_d_", replacement = ".")

prot <- prot %>%
dplyr::mutate(!!by_column := stringr::str_replace_all(
.data[[by_column]],
c(
"\\." = "_d_",
" " = "_",
"\\+" = " ",
"-" = "__",
regex_identify_repeats = "\\1(s)",
"__" = "-",
" " = "+",
"_d_" = "."
)
))

return(prot)

}


Expand Down Expand Up @@ -701,8 +689,8 @@ cleanGeneDescription <- function(prot, column) {
#' @param column The name of the column from which the longest entry among
#' duplicates will be selected.
#'
#' @importFrom dplyr arrange filter group_by pull n select summarize
#' @importFrom rlang sym
#' @importFrom dplyr arrange filter group_by pull n select summarize mutate
#' @importFrom rlang sym .data
#'
#' @return A data frame containing only the longest entries among duplicates
#' based on the specified column.
Expand All @@ -713,37 +701,37 @@ cleanGeneDescription <- function(prot, column) {
#' selectLongestDuplicate()
#' }
selectLongestDuplicate <- function(prot, column) {
col <- sym(column)

prot$row.orig <- 1:nrow(prot)

col <- rlang::sym(column)
prot <- prot %>%
mutate(row.orig = seq_len(n()))
# Get list of duplicates
dups <- prot %>%
group_by(AccNum) %>%
summarize("count" = n()) %>%
group_by(.data$AccNum) %>%
summarize(count = n()) %>%
filter(count > 1) %>%
arrange(-count) %>%
merge(prot, by = "AccNum")
arrange(desc(count)) %>%
left_join(prot, by = "AccNum")

dup_acc <- dups$AccNum
dup_acc <- unique(dups$AccNum)

longest_rows <- c()
remove_rows <- c()
longest_rows <- integer()
remove_rows <- integer()
for (acc in dup_acc) {
dup_rows <- dups %>% filter(AccNum == acc)
dup_rows <- dups %>% filter(.data$AccNum == acc)

longest <- dup_rows[which(nchar(pull(dup_rows, {{ col }})) == max(nchar(pull(dup_rows, {{ col }}))))[1], "row.orig"]
longest <- dup_rows$row.orig[which.max(nchar(pull(dup_rows, !!col)))]

longest_rows <- c(longest_rows, longest)

to_remove <- dup_rows[which(dup_rows$row.orig != longest), "row.orig"][]
to_remove <- dup_rows$row.orig[dup_rows$row.orig != longest]

# dup_rows[which(nchar(pull(dup_rows,{{col}})) == max(nchar(pull(dup_rows,{{col}}))))[2:nrow(dup_rows)], "row.orig"]
remove_rows <- c(remove_rows, to_remove)
}

# grab all the longest rows
unique_dups <- prot[-remove_rows, ] %>% select(-row.orig)
unique_dups <- prot %>%
filter(!.data$row.orig %in% remove_rows) %>%
select(-.data$row.orig)

return(unique_dups)
}
Expand Down
8 changes: 7 additions & 1 deletion R/plotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,7 @@ plotLineageHeatmap <- function(prot, domains_of_interest, level = 3, label.size
#' @param coord_flip Logical. Whether to flip the coordinates of the plot
#' (default is TRUE).
#' @param legend Logical. Whether to display the legend (default is TRUE).
#' @param cpcols
#'
#' @importFrom dplyr pull select
#' @importFrom ggplot2 aes_string coord_flip element_blank element_line element_rect element_text geom_bar ggplot guides guide_legend scale_fill_manual xlab ylab theme theme_minimal
Expand All @@ -903,8 +904,13 @@ plotStackedLineage <- function(prot, column = "DomArch", cutoff, Lineage_col = "
legend.text.size = 10,
legend.cols = 2,
legend.size = 0.7,
coord_flip = TRUE, legend = TRUE) {
coord_flip = TRUE, legend = TRUE,
cpcols = NULL) {
col <- sym(column)

if (is.null(cpcols)) {
cpcols <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF")
}

if (reduce_lineage) {
prot <- shortenLineage(prot, Lineage_col, abr_len = 3)
Expand Down
5 changes: 4 additions & 1 deletion man/plotStackedLineage.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b758992

Please sign in to comment.