Skip to content

Commit

Permalink
add goslim and analysis of OmicsBox output
Browse files Browse the repository at this point in the history
  • Loading branch information
jdieramon committed Mar 3, 2023
1 parent 577ddbe commit d436c1f
Show file tree
Hide file tree
Showing 5 changed files with 1,173 additions and 3 deletions.
Binary file added annotation/.DS_Store
Binary file not shown.
5 changes: 2 additions & 3 deletions GOslim.R → annotation/GOslim.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## Read the ontologies 'bp' , 'mf', 'cc' and update it with new GO slims

# Load slim class object
load("data/slim.rda")
load("dat/slim.rda")


# New GO slims to add into 'bp' , 'mf', 'cc'
Expand All @@ -25,13 +25,12 @@ bp <- bp %>%
slim_class = c("protein metabolism", "response to abiotic or biotic stimulus",
"response to abiotic or biotic stimulus")))



mf <- mf %>%
bind_rows(c(GO_type = "MF",
slim_GO_ID = "GO:0140110",
slim_GO_Name = "transcription regulator activity",
slim_class = "other enzyme activity "))


# Save updated slim classification object
save(bp, mf, cc, file = "slim.rda")
170 changes: 170 additions & 0 deletions annotation/omicsBox_slim.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
# Dependencies
library(dplyr)
library(stringr)
library(ggplot2)

# -----------------------------------------------------------------------------
# Analize the output file from OmicsBox
# -----------------------------------------------------------------------------

# OMICSBOX / EXPORT



# --- Analysis GENERIC EXPORT --------------------------------------------------

genexport <- read.delim("dat/generic_export")

# Sequence.Name Annotation.GO.ID Annotation.GO.Term Annotation.GO.Category
# 1 XP_004495769.1 GO:0000166 nucleotide binding Molecular Function
# 2 XP_004495769.1 GO:0005515 protein binding Molecular Function
# 3 XP_004495769.1 GO:0005634 nucleus Cellular Component


# Quick view
genexport %>% count(Annotation.GO.Category)

# Filter out sequences without GO terms
genexport <- genexport %>%
filter(Annotation.GO.ID %in% str_subset(Annotation.GO.ID, "")) %>%
as_tibble()


# Number of GO terms
genexport %>% count(Annotation.GO.Category)

# Sequences with 3, 2, or 1 GO terms
genexport %>%
count(Sequence.Name, Annotation.GO.Category) %>%
arrange(Sequence.Name) %>%
count(Sequence.Name) %>%
filter(n == 3)

genexport %>%
count(Sequence.Name, Annotation.GO.Category) %>%
arrange(Sequence.Name) %>%
count(Sequence.Name) %>%
filter(n == 2)

genexport %>%
count(Sequence.Name, Annotation.GO.Category) %>%
arrange(Sequence.Name) %>%
count(Sequence.Name) %>%
filter(n == 1)



# Number of BP terms per sequence
genexport %>%
filter(Annotation.GO.Category == "Biological Process") %>%
count(Annotation.GO.Category, Sequence.Name, sort = T) %>%
select(Sequence.Name, n)

# Number of MF terms per sequence
genexport %>%
filter(Annotation.GO.Category == "Molecular Function") %>%
count(Annotation.GO.Category, Sequence.Name, sort = T) %>%
select(Sequence.Name, n)

# Number of CC terms per sequence
genexport %>%
filter(Annotation.GO.Category == "Cellular Component") %>%
count(Annotation.GO.Category, Sequence.Name, sort = T) %>%
select(Sequence.Name, n)




# --- Analysis SLIM ----------------------------------------------------------

# Check if any BP, CC, MF term is not available in the 'slim.rda' object

# Load slim classification
load("dat/slim.rda")

# check available categories per Ontology
bp %>% count(slim_class, sort = T)
mf %>% count(slim_class, sort = T)
cc %>% count(slim_class, sort = T)

genexport_cc <- genexport %>%
rename("slim_GO_ID" = Annotation.GO.ID) %>%
inner_join(cc, by = "slim_GO_ID")

genexport_mf <- genexport %>%
rename("slim_GO_ID" = Annotation.GO.ID) %>%
inner_join(mf, by = "slim_GO_ID")

genexport_bp <- genexport %>%
rename("slim_GO_ID" = Annotation.GO.ID) %>%
inner_join(bp, by = "slim_GO_ID")


# Check if you need to update the `slim´ object.
add_slim <- function() {

slims <- genexport_bp %>%
bind_rows(genexport_mf, genexport_cc)

slims_uniq <- slims %>% pull(slim_GO_ID) %>% unique()

omics <- genexport %>% pull(Annotation.GO.ID) %>% unique()


if(length(slims_uniq) < length(omics)) {
cat(paste0("The OmicsBox annotation has ", length(omics),
" GO terms but the `slim` object has ", length(slims_uniq), " GO terms.
\nYou need to update the `slim` object."))

cat("\n")
omics[!omics %in% slims_uniq]


} else {print("No need to update the `slim´ object. ")}
}

add_slim()

# --- how to update the slim object :
# Example : "GO:0009507"
## goto : https://www.ebi.ac.uk/QuickGO/search/GO:0042221
# get the class : 'cc'
# get the name : chloroplast
# get the class (from the ancestor chart; hierarchy icon) : plastid / chloroplast
#
# cc <- cc %>% bind_rows(data.frame(GO_type = c("CC"),
# slim_GO_ID = c("GO:0009507"),
# slim_GO_Name = c("chloroplast"),
# slim_class = c("chloroplast")))
#
# bp <- bp %>% bind_rows(data.frame(GO_type = c("BP"),
# slim_GO_ID = c("GO:0065009"),
# slim_GO_Name = c("regulation of molecular function"),
# slim_class = c("other biological processes")))
#
# mf <- mf %>% bind_rows(data.frame(GO_type = c("MF"),
# slim_GO_ID = c("GO:0038023"),
# slim_GO_Name = c("signaling receptor activity"),
# slim_class = c("other molecular functions")))
#
# # Save updated slim classification object
# save(bp, mf, cc, file = "slim.rda")




#Annotationanalysis
genexport_mf %>%
filter(slim_class == "transcription factor activity") %>%
select(-c(GO_type, slim_class))

genexport_bp %>%
filter(slim_class %in% str_subset(slim_class, "response"))

genexport_bp %>%
count(slim_class, sort = T) %>%
ggplot(aes(reorder(slim_class, n), n)) +
geom_col(fill = "steelblue") +
coord_flip() + xlab("") + ylab("")


Loading

0 comments on commit d436c1f

Please sign in to comment.