-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #128 from mskcc/enhancement/hg38_update
Enhancement/hg38 update
- Loading branch information
Showing
61 changed files
with
4,306 additions
and
384 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
# __author__ = "Alexandria Dymun" | ||
# __email__ = "[email protected]" | ||
# __contributor__ = "Anne Marie Noronha ([email protected])" | ||
# __version__ = "0.0.1" | ||
# __version__ = "0.0.2" | ||
# __status__ = "Dev" | ||
|
||
|
||
|
@@ -12,11 +12,12 @@ suppressPackageStartupMessages({ | |
library(dplyr) | ||
library(data.table) | ||
library(stringr) | ||
options(scipen = 999) | ||
}) | ||
|
||
usage <- function() { | ||
message("Usage:") | ||
message("final_generate_v75_gene_bed.R <in.gff> <out.bed>") | ||
message("generate_gene_bed.R <in.gff> <out.bed>") | ||
} | ||
|
||
args = commandArgs(TRUE) | ||
|
@@ -26,15 +27,10 @@ if (length(args)!=2) { | |
quit() | ||
} | ||
|
||
# Utilized gtf from igenomes for FORTE This corresponds to GRCh37 ensembl 75 | ||
# Add introns to gtf, convert to gff3 | ||
# bsub -R "rusage[mem=64]" -o add_introns_agat_%J.out singularity exec -B /juno/ \\ | ||
# -B /tmp -B /scratch/ docker://quay.io/biocontainers/agat:0.8.0--pl5262hdfd78af_0 \\ | ||
# /bin/bash -c "agat_sp_add_introns.pl -g /juno/work/taylorlab/cmopipeline/mskcc-igenomes/igenomes/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf\\ | ||
# -o genes.INTRONS.gff3" | ||
|
||
gtf <- rtracklayer::import(args[1]) | ||
gtf_df <- as.data.frame(gtf) | ||
#remove incomplete transcripts mRNA_end_NF and mRNA_start_NF (not finished) | ||
gtf_df <- gtf_df[!grepl("NF",gtf_df$tag),] | ||
|
||
file.to_write <- args[2] | ||
|
||
|
@@ -44,7 +40,8 @@ gtf_df <- gtf_df %>% | |
chr = seqnames | ||
) %>% | ||
select(c(chr, start, end, transcript_id, type, strand, gene_name, gene_id)) %>% | ||
filter(type %in% c("exon","intron","UTR","CDS","cds","utr")) %>% mutate(start = start-1) | ||
filter(type %in% c("exon","intron","UTR","CDS","cds","utr","five_prime_utr","three_prime_utr")) %>% | ||
mutate(gene_name = ifelse(is.na(gene_name),gene_id,gene_name)) %>% mutate(start = start-1) | ||
|
||
|
||
#START CLOCK | ||
|
@@ -106,6 +103,8 @@ modify_transcript <- function(transcript){ | |
transcript$type[transcript$start >= stop_coding & transcript$type == "UTR"] <- "utr5" | ||
} | ||
} | ||
transcript$type[transcript$type == "five_prime_utr"] <- "utr5" | ||
transcript$type[transcript$type == "three_prime_utr"] <- "utr3" | ||
#### Any exon that remains after teh cds change, is likely and untranslated region. change below | ||
|
||
# Basically, subfeatures which are "exon" need to be changed (i.e. exon --> utr3/utr5) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,30 @@ | ||
FROM ubuntu:bionic-20230530 | ||
FROM ubuntu:jammy-20240911.1 | ||
|
||
LABEL maintainer="Anne Marie Noronha ([email protected])" \ | ||
version.image="0.0.6" | ||
version.image="0.0.7" | ||
|
||
# INSTALL DEPENDENCIES | ||
|
||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
||
RUN apt-get update -y | ||
RUN apt-get install -y build-essential python3 python3-pip python3-matplotlib python3-pandas python3-future python3-biopython curl less vim libnss-sss git zip | ||
RUN apt-get install -y \ | ||
build-essential \ | ||
python3 \ | ||
python3-pip \ | ||
python3-matplotlib \ | ||
python3-pandas \ | ||
python3-future \ | ||
python3-biopython \ | ||
python3-dev \ | ||
default-libmysqlclient-dev \ | ||
pkg-config \ | ||
curl \ | ||
less \ | ||
vim \ | ||
libnss-sss \ | ||
git \ | ||
zip | ||
RUN pip3 install --upgrade pip | ||
RUN pip3 install pyensembl | ||
|
||
|
@@ -18,9 +34,8 @@ RUN pip3 install mysqlclient | |
|
||
# INSTALL AGFUSION & DATABASE FILES | ||
WORKDIR /usr/local/bin | ||
RUN git clone https://github.com/mskcc/AGFusion.git --branch v1.4.1-fork1 --single-branch | ||
RUN git clone https://github.com/mskcc/AGFusion.git --branch v1.4.[email protected] --single-branch | ||
WORKDIR /usr/local/bin/AGFusion | ||
RUN pip3 install -r requirements.txt | ||
RUN pip3 install . | ||
|
||
# downgrade pyensembl for compatibility | ||
RUN pip3 install gtfparse==1.2.1 --upgrade |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
dependencies: | ||
- conda-forge::gawk=5.3.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
process FASTAREMOVEPREFIX { | ||
tag "$fasta" | ||
label 'process_single' | ||
|
||
conda "${moduleDir}/environment.yml" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/gawk:5.3.0' : | ||
'biocontainers/gawk:5.3.0' }" | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
input: | ||
tuple val(meta), path(fasta, name: 'input/*') | ||
|
||
output: | ||
tuple val(meta), path("*.{fa,fasta}"), emit: fasta | ||
path "versions.yml" , emit: versions | ||
|
||
script: | ||
def modified_fasta = fasta.fileName.name | ||
""" | ||
cat ${fasta} | sed "s/^>chr/>/g" | sed "s/^>M />MT /g" > ${modified_fasta} | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') | ||
END_VERSIONS | ||
""" | ||
|
||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.