Skip to content

Commit

Permalink
Filename uses version not dates
Browse files Browse the repository at this point in the history
  • Loading branch information
davmlaw committed Feb 3, 2022
1 parent e486cfd commit 48b6817
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 7 deletions.
4 changes: 3 additions & 1 deletion generate_transcript_data/ensembl_transcripts_grch37.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

set -e

CDOT_VERSION=0.2.1

# v81 (points to 75) and earlier at GTFs that don't have transcript versions - just skip them

#82 is first GFF3 for GRCh37
Expand All @@ -22,7 +24,7 @@ for release in 82 85 87; do
merge_args+=(${cdot_file})
done

merged_file="cdot-$(date --iso).ensembl.grch37.json.gz"
merged_file="cdot-${CDOT_VERSION}.ensembl.grch37.json.gz"
if [[ ! -e ${merged_file} ]]; then
BASE_DIR=$(dirname ${BASH_SOURCE[0]})
cdot_json.py merge_historical ${merge_args[@]} --genome-build=GRCh37 --output "${merged_file}"
Expand Down
4 changes: 3 additions & 1 deletion generate_transcript_data/ensembl_transcripts_grch38.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

set -e

CDOT_VERSION=0.2.1

# Skip earlier GTFs as they don't have versions
#for release in 76 77 78 79 80; do
# filename=Homo_sapiens.GRCh38.${release}.gtf.gz
Expand Down Expand Up @@ -31,7 +33,7 @@ for release in 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
merge_args+=(${cdot_file})
done

merged_file="cdot-$(date --iso).ensembl.grch38.json.gz"
merged_file="cdot-${CDOT_VERSION}.ensembl.grch38.json.gz"
if [[ ! -e ${merged_file} ]]; then
BASE_DIR=$(dirname ${BASH_SOURCE[0]})
cdot_json.py merge_historical ${merge_args[@]} --genome-build=GRCh38 --output "${merged_file}"
Expand Down
3 changes: 2 additions & 1 deletion generate_transcript_data/refseq_transcripts_grch37.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

set -e

CDOT_VERSION=0.2.1
BASE_DIR=$(dirname ${BASH_SOURCE[0]})
GENOME_BUILD=grch37
UTA_VERSION=20210129
Expand Down Expand Up @@ -79,7 +80,7 @@ for release in 105.20190906 105.20201022; do
merge_args+=(${cdot_file})
done

merged_file="cdot-$(date --iso).refseq.grch37.json.gz"
merged_file="cdot-${CDOT_VERSION}.refseq.grch37.json.gz"
if [[ ! -e ${merged_file} ]]; then
echo "Creating ${merged_file}"
cdot_json.py merge_historical ${merge_args[@]} --genome-build=GRCh37 --output "${merged_file}"
Expand Down
9 changes: 5 additions & 4 deletions generate_transcript_data/refseq_transcripts_grch38.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

set -e

CDOT_VERSION=0.2.1
BASE_DIR=$(dirname ${BASH_SOURCE[0]})
GENOME_BUILD=grch38
UTA_VERSION=20210129
Expand Down Expand Up @@ -81,9 +82,9 @@ fi
merge_args+=(${cdot_file})


# These all have the same name, so rename them based on release ID
# 28 Jan 2022 - HTSeq GFF Parser currently dies on 109.20211119 - removed, waiting for it to get fixed
for release in 109.20190607 109.20190905 109.20191205 109.20200228 109.20200522 109.20200815 109.20201120 109.20210226 109.20210514; do
# 109.20211119 needs latest HTSeq (Feb 2022) or dies with quoting error
for release in 109.20190607 109.20190905 109.20191205 109.20200228 109.20200522 109.20200815 109.20201120 109.20210226 109.20210514 109.20211119; do
# These all have the same name, so rename them based on release ID
filename=GCF_000001405.39_GRCh38.p13_genomic.${release}.gff.gz
url=http://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/annotation_releases/${release}/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.gff.gz
cdot_file=$(basename $filename .gz).json.gz
Expand All @@ -96,7 +97,7 @@ for release in 109.20190607 109.20190905 109.20191205 109.20200228 109.20200522
merge_args+=(${cdot_file})
done

merged_file="cdot-$(date --iso).refseq.grch38.json.gz"
merged_file="cdot-${CDOT_VERSION}.refseq.grch38.json.gz"
if [[ ! -e ${merged_file} ]]; then
echo "Creating ${merged_file}"
cdot_json.py merge_historical ${merge_args[@]} --genome-build=GRCh38 --output "${merged_file}"
Expand Down

0 comments on commit 48b6817

Please sign in to comment.