Skip to content

Commit

Permalink
#1224 - VEP - structural variants too long to annotate
Browse files Browse the repository at this point in the history
  • Loading branch information
davmlaw committed Jan 29, 2025
1 parent 18a4047 commit 42e07ff
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.18 on 2025-01-28 23:32

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('annotation', '0123_one_off_fix_annotation_sv_c_hgvs'),
]

operations = [
migrations.AlterField(
model_name='variantannotation',
name='vep_skipped_reason',
field=models.CharField(blank=True, choices=[('c', 'Unknown Contig'), ('i', 'Incomplete'), ('u', 'Unknown'), ('l', 'Too Long')], max_length=1, null=True),
),
]
31 changes: 31 additions & 0 deletions annotation/migrations/0125_one_off_historical_vep_too_long.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Generated by Django 4.2.18 on 2025-01-29 00:48
import logging

from django.db import migrations
from django.db.models.functions import Abs


def _one_off_historical_vep_too_long(apps, schema_editor):
Variant = apps.get_model("snpdb", "Variant")
VariantAnnotation = apps.get_model("annotation", "VariantAnnotation")

VEP_SKIPPED_UNKNOWN = 'u'
VEP_SKIPPED_TOO_LONG = 'l'

qs = Variant.objects.filter(svlen__isnull=False).annotate(abs_svlen=Abs("svlen"))
qs_long_variants = qs.filter(abs_svlen__gte=10_000_000)
qs_qa_long = VariantAnnotation.objects.filter(variant__in=qs_long_variants,
vep_skipped_reason=VEP_SKIPPED_UNKNOWN)
if ret := qs_qa_long.update(vep_skipped_reason=VEP_SKIPPED_TOO_LONG):
logging.info("Updated %d VEP skipped variants to TOO_LONG ", ret)


class Migration(migrations.Migration):

dependencies = [
('annotation', '0124_alter_variantannotation_vep_skipped_reason'),
]

operations = [
migrations.RunPython(_one_off_historical_vep_too_long)
]
1 change: 1 addition & 0 deletions annotation/models/models_enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ class VEPSkippedReason(models.TextChoices):
UNKNOWN_CONTIG = 'c', "Unknown Contig"
INCOMPLETE = 'i', "Incomplete"
UNKNOWN = 'u', "Unknown"
TOO_LONG = 'l', "Too Long"


class ClinVarReviewStatus(models.TextChoices):
Expand Down
3 changes: 3 additions & 0 deletions annotation/vcf_files/import_vcf_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def handle_vep_warnings(annotation_run: AnnotationRun, bulk_inserter):

version = annotation_run.annotation_range_lock.version
annotation_version = version.get_any_annotation_version()
# This pulls down any un-annotated variants (which after running VEP + inserting means were skipped)
for v in get_variants_qs_for_annotation(annotation_version,
pipeline_type=annotation_run.pipeline_type,
min_variant_id=annotation_run.annotation_range_lock.min_variant_id,
Expand All @@ -92,6 +93,8 @@ def handle_vep_warnings(annotation_run: AnnotationRun, bulk_inserter):
reason = VEPSkippedReason.UNKNOWN_CONTIG
elif v.pk in incomplete_variant_ids:
reason = VEPSkippedReason.INCOMPLETE
elif v.length > settings.ANNOTATION_VEP_SV_MAX_SIZE:
reason = VEPSkippedReason.TOO_LONG
else:
reason = VEPSkippedReason.UNKNOWN

Expand Down
5 changes: 5 additions & 0 deletions annotation/vep_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,11 @@ def get_vep_command(vcf_filename, output_filename, genome_build: GenomeBuild, an
if settings.ANNOTATION_VEP_DISTANCE is not None:
cmd.extend(["--distance", str(settings.ANNOTATION_VEP_DISTANCE)])

if max_sv_size := settings.ANNOTATION_VEP_SV_MAX_SIZE:
vep_default_max_sv_size = 10_000_000
if max_sv_size != vep_default_max_sv_size:
cmd.extend(["--max_sv_size", str(max_sv_size)])

if annotation_consortium == AnnotationConsortium.REFSEQ:
cmd.append("--refseq")

Expand Down
1 change: 1 addition & 0 deletions variantgrid/settings/components/annotation_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
ANNOTATION_VEP_SV_OVERLAP_SAME_TYPE = True # Only 'dup' for dups, false is all SVs overlap
ANNOTATION_VEP_SV_OVERLAP_SINGLE_VALUE_METHOD = "lowest_af" # "greatest_overlap", "lowest_af", "exact_or_lowest_af"
ANNOTATION_VEP_SV_OVERLAP_MIN_FRACTION = 0.8
ANNOTATION_VEP_SV_MAX_SIZE = 10_000_000 # VEP default = 10M

ANNOTATION_MAX_BENIGN_RANKSCORE = 0.15
ANNOTATION_MIN_PATHOGENIC_RANKSCORE = 0.85
Expand Down

0 comments on commit 42e07ff

Please sign in to comment.