Skip to content

Commit

Permalink
tweaks comp-het bits
Browse files Browse the repository at this point in the history
  • Loading branch information
MattWellie committed Dec 6, 2023
1 parent ee4f960 commit 9648947
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 64 deletions.
7 changes: 5 additions & 2 deletions reanalysis/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,18 +266,21 @@ def get_sample_flags(self, sample: str) -> set[str]:
"""
return self.check_ab_ratio(sample) | self.check_read_depth(sample)

def check_read_depth(self, sample: str, threshold: int = 10) -> set[str]:
def check_read_depth(
self, sample: str, threshold: int = 10, var_is_cat_1: bool = False
) -> set[str]:
"""
flag low read depth for this sample
Args:
sample (str): sample ID matching VCF
threshold (int): cut-off for flagging as a failure
var_is_cat_1 (bool): flag if this variant is a category 1
Returns:
return a flag if this sample has low read depth
"""
if self.depths[sample] < threshold:
if self.depths[sample] < threshold and not var_is_cat_1:
return {'Low Read Depth'}
return set()

Expand Down
138 changes: 83 additions & 55 deletions reanalysis/moi_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@


def check_for_second_hit(
first_variant: str, comp_hets: CompHetDict, sample: str
first_variant: str,
comp_hets: CompHetDict,
sample: str,
require_non_support: bool = False,
) -> list[VARIANT_MODELS]:
"""
checks for a second hit partner in this gene
Expand All @@ -53,18 +56,27 @@ def check_for_second_hit(
} ...
}
:param first_variant: string representation of variant1
:param comp_hets: lookup dict for compound hets
:param sample: ID string
:return:
Args:
first_variant (str): string representation of variant1
comp_hets (dict[str, Variant]): lookup for compound hets
sample (str): sample ID
require_non_support (bool): if true, don't return Support only
Returns:
a list of variants which are potential partners
"""

# check if the sample has any comp-hets
if sample not in comp_hets.keys():
if sample not in comp_hets:
return []

sample_dict = comp_hets.get(sample)
return sample_dict.get(first_variant, [])
partners = comp_hets[sample].get(first_variant, [])
if require_non_support:
return [
partner for partner in partners if not partner.sample_support_only(sample)
]
else:
return partners


class MOIRunner:
Expand Down Expand Up @@ -189,9 +201,10 @@ def check_affected_category_depths(
self.pedigree[sample_id].affected == PEDDY_AFFECTED
and variant.sample_category_check(sample_id, allow_support=False)
)
) or (
variant.check_read_depth(sample_id, self.minimum_depth)
and not variant.info.get('categoryboolean1')
) or variant.check_read_depth(
sample_id,
self.minimum_depth,
var_is_cat_1=variant.info.get('categoryboolean1'),
):
return True
return False
Expand Down Expand Up @@ -412,8 +425,11 @@ def run(
self.pedigree[sample_id].affected == PEDDY_AFFECTED
and principal.sample_category_check(sample_id, allow_support=False)
) or (
principal.check_read_depth(sample_id, self.minimum_depth)
and not principal.info.get('categoryboolean1')
principal.check_read_depth(
sample_id,
self.minimum_depth,
var_is_cat_1=principal.info.get('categoryboolean1'),
)
):
continue

Expand Down Expand Up @@ -456,6 +472,11 @@ def __init__(
applied_moi: str = 'Autosomal Recessive Comp-Het',
):
""" """
self.hom_threshold = get_config()['moi_tests'][GNOMAD_REC_HOM_THRESHOLD]
self.freq_tests = {
SmallVariant.__name__: {key: self.hom_threshold for key in INFO_HOMS},
StructuralVariant.__name__: {key: self.hom_threshold for key in SV_HOMS},
}
super().__init__(pedigree=pedigree, applied_moi=applied_moi)

def run(
Expand All @@ -482,6 +503,15 @@ def run(

classifications = []

# remove if too many homs are present in population databases
if not (
self.check_frequency_passes(
principal.info, self.freq_tests[principal.__class__.__name__]
)
or principal.info.get('categoryboolean1')
):
return classifications

# if hets are present, try and find support
for sample_id in principal.het_samples:

Expand All @@ -493,33 +523,39 @@ def run(
and principal.sample_category_check(sample_id, allow_support=True)
)
) or (
principal.check_read_depth(sample_id, self.minimum_depth)
and not principal.info.get('categoryboolean1')
principal.check_read_depth(
sample_id,
self.minimum_depth,
principal.info.get('categoryboolean1'),
)
):
continue

for partner_variant in check_for_second_hit(
first_variant=principal.coordinates.string_format,
comp_hets=comp_het,
sample=sample_id,
require_non_support=principal.sample_support_only(sample_id),
):

# skip the double-support scenario
if (
principal.sample_support_only(sample_id)
and partner_variant.sample_support_only(sample_id)
) or (
partner_variant.check_read_depth(sample_id, self.minimum_depth)
and not partner_variant.info.get('categoryboolean1')
if partner_variant.check_read_depth(
sample_id,
self.minimum_depth,
partner_variant.info.get('categoryboolean1'),
) or not (
self.check_frequency_passes(
partner_variant.info,
self.freq_tests[partner_variant.__class__.__name__],
)
):
continue

# categorised for this specific sample, allow support in partner
# - also screen out high-AF partners
if not partner_variant.sample_category_check(
sample_id, allow_support=True
):
continue
# # categorised for this specific sample, allow support in partner
# # - also screen out high-AF partners
# if not partner_variant.sample_category_check(
# sample_id, allow_support=True
# ):
# continue

# check if this is a candidate for comp-het inheritance
if not self.check_comp_het(
Expand Down Expand Up @@ -606,9 +642,8 @@ def run(
self.pedigree[sample_id].affected == PEDDY_AFFECTED
and principal.sample_category_check(sample_id, allow_support=False)
)
) or (
principal.check_read_depth(sample_id, self.minimum_depth)
and not principal.info.get('categoryboolean1')
) or principal.check_read_depth(
sample_id, self.minimum_depth, principal.info.get('categoryboolean1')
):
continue

Expand Down Expand Up @@ -714,9 +749,8 @@ def run(
principal.sample_category_check(sample_id, allow_support=False)
and self.pedigree[sample_id].affected == PEDDY_AFFECTED
)
) or (
principal.check_read_depth(sample_id, self.minimum_depth)
and not principal.info.get('categoryboolean1')
) or principal.check_read_depth(
sample_id, self.minimum_depth, principal.info.get('categoryboolean1')
):
continue

Expand Down Expand Up @@ -815,9 +849,8 @@ def run(
self.pedigree[sample_id].affected == PEDDY_AFFECTED
and principal.sample_category_check(sample_id, allow_support=False)
)
) or (
principal.check_read_depth(sample_id, self.minimum_depth)
and not principal.info.get('categoryboolean1')
) or principal.check_read_depth(
sample_id, self.minimum_depth, principal.info.get('categoryboolean1')
):
continue

Expand Down Expand Up @@ -912,9 +945,8 @@ def run(
self.pedigree[sample_id].affected == PEDDY_AFFECTED
and principal.sample_category_check(sample_id, allow_support=False)
)
) or (
principal.check_read_depth(sample_id, self.minimum_depth)
and not principal.info.get('categoryboolean1')
) or principal.check_read_depth(
sample_id, self.minimum_depth, principal.info.get('categoryboolean1')
):
continue

Expand Down Expand Up @@ -1011,37 +1043,33 @@ def run(
self.pedigree[sample_id].affected == PEDDY_AFFECTED
and principal.sample_category_check(sample_id, allow_support=True)
)
) or (
principal.check_read_depth(sample_id, self.minimum_depth)
and not principal.info.get('categoryboolean1')
) or principal.check_read_depth(
sample_id, self.minimum_depth, principal.info.get('categoryboolean1')
):
continue

for partner in check_for_second_hit(
first_variant=principal.coordinates.string_format,
comp_hets=comp_het,
sample=sample_id,
require_non_support=principal.sample_support_only(sample_id),
):

# allow for de novo check - also screen out high-AF partners
if (
not partner.sample_category_check(sample_id, allow_support=True)
or not (
self.check_frequency_passes(
partner.info, self.freq_tests[partner.__class__.__name__]
)
or partner.info.get('categoryboolean1')
if not partner.sample_category_check(
sample_id, allow_support=True
) or not (
self.check_frequency_passes(
partner.info, self.freq_tests[partner.__class__.__name__]
)
) or (
principal.sample_support_only(sample_id)
and partner.sample_support_only(sample_id)
or partner.info.get('categoryboolean1')
):
continue

# check for minimum depth in partner
if partner.check_read_depth(
sample_id, self.minimum_depth
) and not partner.info.get('categoryboolean1'):
sample_id, self.minimum_depth, partner.info.get('categoryboolean1')
):
continue

if not self.check_comp_het(
Expand Down
13 changes: 6 additions & 7 deletions reanalysis/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,8 @@ def get_phase_data(samples, var) -> dict[str, dict[int, str]]:
# phase set is a number
if phase != PHASE_SET_DEFAULT:
phased_dict[sample][phase] = gt
except KeyError:
except KeyError as ke:
raise ke
get_logger().info('failed to find PS phase attributes')
try:
# retry using PGT & PID
Expand Down Expand Up @@ -461,6 +462,9 @@ def create_small_variant(
_boolcat = info.pop('categoryboolean2')
info['categorysample2'] = new_gene_samples

# organise PM5
info = organise_pm5(info)

# set the class attributes
boolean_categories = [
key for key in info.keys() if key.startswith('categoryboolean')
Expand Down Expand Up @@ -488,8 +492,6 @@ def create_small_variant(
elif isinstance(info[sam_cat], list):
info[sam_cat] = set(info[sam_cat])

# organise PM5
info = organise_pm5(info)
phased = get_phase_data(samples, var)
ab_ratios = dict(zip(samples, map(float, var.gt_alt_freqs)))
transcript_consequences = extract_csq(csq_contents=info.pop('csq', []))
Expand Down Expand Up @@ -542,15 +544,12 @@ def create_structural_variant(var: cyvcf2.Variant, samples: list[str]):
for cat in boolean_categories:
info[cat] = info.get(cat, 0) == 1

phased = get_phase_data(samples, var)

return StructuralVariant(
coordinates=coordinates,
info=info,
het_samples=het_samples,
hom_samples=hom_samples,
boolean_categories=boolean_categories,
phased=phased,
)


Expand Down Expand Up @@ -661,7 +660,7 @@ def gather_gene_dict_from_contig(
structural_variant
)

get_logger().info(f'Contig {contig} contained {structural_variants} variants')
get_logger().info(f'Contig {contig} contained {structural_variants} SVs')

get_logger().info(f'Contig {contig} contained {contig_variants} variants')
get_logger().info(f'Contig {contig} contained {len(contig_dict)} genes')
Expand Down

0 comments on commit 9648947

Please sign in to comment.