Skip to content

Commit

Permalink
#51 - historical GRCh38 - get right URL for alignments
Browse files Browse the repository at this point in the history
  • Loading branch information
davmlaw committed Aug 10, 2023
1 parent 181fa70 commit d755146
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
9 changes: 8 additions & 1 deletion generate_transcript_data/gff_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,14 @@ def handle_feature(self, feature):
self.transcript_proteins[transcript_accession] = genbank

if transcript_accession:
transcript = self.transcript_data_by_accession[transcript_accession]
transcript = self.transcript_data_by_accession.get(transcript_accession)
if not transcript:
msg = f"Couldn't find transcript data for accession '{transcript_accession}'"
if self.skip_missing_parents:
logging.warning(msg)
self.skipped_features_no_parents[feature.type] += 1
return
raise ValueError(msg)
self._handle_transcript_data(transcript_accession, transcript, feature)
else:
# There are so many different transcript ontology terms just taking everything that
Expand Down
2 changes: 1 addition & 1 deletion generate_transcript_data/refseq_transcripts_grch38.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ if [[ ! -e ${annotation_filename} ]]; then
wget ${url} --output-document=${annotation_filename}
fi

url=https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/GCF_000001405.40-RS_2023_03/RefSeq_historical_alignments/GCF_000001405.40-RS_2023_03_genomic.gff.gz
url=https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/GCF_000001405.40-RS_2023_03/RefSeq_historical_alignments/GCF_000001405.40-RS_2023_03_knownrefseq_alns.gff.gz
alignments_filename=$(basename $url)
if [[ ! -e ${alignments_filename} ]]; then
wget ${url} --output-document=${alignments_filename}
Expand Down

0 comments on commit d755146

Please sign in to comment.