-
Notifications
You must be signed in to change notification settings - Fork 5
Biocommons T2T CHM13v2.0 example code
Dave Lawrence edited this page Apr 3, 2023
·
2 revisions
# Get FASTA
wget --quiet -O - https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.fna.gz | gzip -d | bgzip > GCF_009914755.1_T2T-CHM13v2.0_genomic.fna.gz
samtools faidx GCF_009914755.1_T2T-CHM13v2.0_genomic.fna.gz
# Get cdot data
wget https://github.com/SACGF/cdot/releases/download/v0.2.14/cdot-0.2.15.GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.json.gz
import hgvs
from hgvs.assemblymapper import AssemblyMapper
from cdot.hgvs.dataproviders import JSONDataProvider, RESTDataProvider
from cdot.hgvs.dataproviders.fasta_seqfetcher import FastaSeqFetcher
seqfetcher = FastaSeqFetcher("./GCF_009914755.1_T2T-CHM13v2.0_genomic.fna.gz")
hdp = JSONDataProvider(["./cdot-0.2.15.GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.json.gz"],
seqfetcher=seqfetcher)
am = AssemblyMapper(hdp,
assembly_name='CHM13v2.0',
alt_aln_method='splign', replace_reference=True)
hp = hgvs.parser.Parser()
var_c = hp.parse_hgvs_variant('NM_001637.4:c.1582G>A')
print(am.c_to_g(var_c))
Output:
NC_060931.1:g.36662409C>T