Skip to content

Commit

Permalink
#51 - historical - move to after UTA so only use if nothing else over…
Browse files Browse the repository at this point in the history
…writes
  • Loading branch information
davmlaw committed Aug 9, 2023
1 parent 4759b96 commit 2886dfc
Showing 1 changed file with 26 additions and 25 deletions.
51 changes: 26 additions & 25 deletions generate_transcript_data/refseq_transcripts_grch38.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,32 @@ if [[ ! -z ${UTA_TRANSCRIPTS} ]]; then
merge_args+=(${uta_cdot_file})
fi

# Historical - these are stored in separate files for annotation/alignments
url=https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/GCF_000001405.40-RS_2023_03/RefSeq_historical_alignments/GCF_000001405.40-RS_2023_03_genomic.gff.gz
annotation_filename=$(basename $url)
if [[ ! -e ${annotation_filename} ]]; then
wget ${url} --output-document=${annotation_filename}
fi

url=https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/GCF_000001405.40-RS_2023_03/RefSeq_historical_alignments/GCF_000001405.40-RS_2023_03_genomic.gff.gz
alignments_filename=$(basename $url)
if [[ ! -e ${alignments_filename} ]]; then
wget ${url} --output-document=${alignments_filename}
fi

filename=GCF_000001405.40-RS_2023_03_combined_annotation_alignments.gff.gz
cdot_file=cdot-${CDOT_VERSION}.$(basename $filename .gz).json.gz

if [[ ! -e ${filename} ]]; then
echo "Combining historical annotations and alignments..."
cat ${annotation_filename} ${alignments_filename} > ${filename}
fi
if [[ ! -e ${cdot_file} ]]; then
${BASE_DIR}/cdot_json.py gff3_to_json "${filename}" --url "${url}" --genome-build=GRCh38 --output "${cdot_file}" --gene-info-json="${GENE_INFO_JSON}" --skip-missing-parents
fi
merge_args+=(${cdot_file})


filename=ref_GRCh38_top_level.gff3.gz
url=http://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.106/GFF/${filename}
cdot_file=cdot-${CDOT_VERSION}.$(basename $filename .gz).json.gz
Expand Down Expand Up @@ -123,31 +149,6 @@ if [[ ! -e ${cdot_file} ]]; then
fi
merge_args+=(${cdot_file})

# Historical - these are stored in separate files for annotation/alignments
url=https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/GCF_000001405.40-RS_2023_03/RefSeq_historical_alignments/GCF_000001405.40-RS_2023_03_genomic.gff.gz
annotation_filename=$(basename $url)
if [[ ! -e ${annotation_filename} ]]; then
wget ${url} --output-document=${annotation_filename}
fi

url=https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/GCF_000001405.40-RS_2023_03/RefSeq_historical_alignments/GCF_000001405.40-RS_2023_03_genomic.gff.gz
alignments_filename=$(basename $url)
if [[ ! -e ${alignments_filename} ]]; then
wget ${url} --output-document=${alignments_filename}
fi

filename=GCF_000001405.40-RS_2023_03_combined_annotation_alignments.gff.gz
cdot_file=cdot-${CDOT_VERSION}.$(basename $filename .gz).json.gz

if [[ ! -e ${filename} ]]; then
echo "Combining historical annotations and alignments..."
cat ${annotation_filename} ${alignments_filename} > ${filename}
fi
if [[ ! -e ${cdot_file} ]]; then
${BASE_DIR}/cdot_json.py gff3_to_json "${filename}" --url "${url}" --genome-build=GRCh38 --output "${cdot_file}" --gene-info-json="${GENE_INFO_JSON}" --skip-missing-parents
fi
merge_args+=(${cdot_file})

## Latest

filename=GCF_000001405.40_GRCh38.p14_genomic.RS_2023_03.gff.gz
Expand Down

0 comments on commit 2886dfc

Please sign in to comment.