Skip to content

Commit

Permalink
add tests for schema to pydantic matching
Browse files Browse the repository at this point in the history
  • Loading branch information
ahwagner committed Feb 10, 2024
1 parent 89e256c commit 6b789a2
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 49 deletions.
115 changes: 66 additions & 49 deletions src/ga4gh/vrs/_internal/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,20 +295,37 @@ class ga4gh(_ValueObject.ga4gh):
]


class LengthExpression(_ValueObject):
"""An expression of a DNA, RNA, or protein polymer of known length but unspecified sequence."""

type: Literal['ReferenceLengthExpression'] = Field(
'ReferenceLengthExpression', description='MUST be "ReferenceLengthExpression"'
)
length: Union[Range, int] = Field(
..., description='The number of residues of the expressed sequence.'
)

class ga4gh(_ValueObject.ga4gh):
keys = [
'length',
'type'
]


class ReferenceLengthExpression(_ValueObject):
"""An expression of a length of a sequence from a repeating reference."""
"""An expression sequence derived from a reference."""

type: Literal['ReferenceLengthExpression'] = Field(
'ReferenceLengthExpression', description='MUST be "ReferenceLengthExpression"'
)
length: Union[Range, int] = Field(
..., description='The number of residues in the expressed sequence.'
..., description='The number of residues of the expressed sequence.'
)
sequence: Optional[SequenceString] = Field(
None, description='the Sequence encoded by the Reference Length Expression.'
)
repeatSubunitLength: int = Field(
None, description='The number of residues in the repeat subunit.'
None, description='The number of residues of the repeat subunit.'
)

class ga4gh(_ValueObject.ga4gh):
Expand Down Expand Up @@ -455,25 +472,25 @@ class ga4gh(_Ga4ghIdentifiableObject.ga4gh):
]


class GenotypeMember(_ValueObject):
"""A class for expressing the count of a specific `MolecularVariation` present
in-trans at a genomic locus represented by a `Genotype`.
"""

type: Literal['GenotypeMember'] = Field('GenotypeMember', description='MUST be "GenotypeMember".')
count: Union[Range, int] = Field(
..., description='The number of copies of the `variation` at a Genotype locus.'
)
variation: Union[Allele, Haplotype] = Field(
..., description='A MolecularVariation at a Genotype locus.'
)

class ga4gh(_Ga4ghIdentifiableObject.ga4gh):
keys = [
'type',
'count',
'variation'
]
# class GenotypeMember(_ValueObject):
# """A class for expressing the count of a specific `MolecularVariation` present
# in-trans at a genomic locus represented by a `Genotype`.
# """
#
# type: Literal['GenotypeMember'] = Field('GenotypeMember', description='MUST be "GenotypeMember".')
# count: Union[Range, int] = Field(
# ..., description='The number of copies of the `variation` at a Genotype locus.'
# )
# variation: Union[Allele, Haplotype] = Field(
# ..., description='A MolecularVariation at a Genotype locus.'
# )
#
# class ga4gh(_Ga4ghIdentifiableObject.ga4gh):
# keys = [
# 'type',
# 'count',
# 'variation'
# ]


class MolecularVariation(RootModel):
Expand All @@ -488,31 +505,31 @@ class MolecularVariation(RootModel):
)


class Genotype(_VariationBase):
"""A quantified set of _in-trans_ `MolecularVariation` at a genomic locus."""

type: Literal['Genotype'] = Field(
'Genotype',
description='MUST be "Genotype"'
)
# TODO members temporarily typed as List instead of Set + validate unique items
members: List[GenotypeMember] = Field(
...,
description='Each GenotypeMember in `members` describes a MolecularVariation and the count of that variation at the locus.',
min_length=1,
)
count: Union[Range, int] = Field(
...,
description='The total number of copies of all MolecularVariation at this locus, MUST be greater than or equal to the sum of GenotypeMember copy counts. If greater than the total counts, this implies additional MolecularVariation that are expected to exist but are not explicitly indicated.',
)

class ga4gh(_Ga4ghIdentifiableObject.ga4gh):
prefix = 'GT'
keys = [
'count',
'members',
'type'
]
# class Genotype(_VariationBase):
# """A quantified set of _in-trans_ `MolecularVariation` at a genomic locus."""
#
# type: Literal['Genotype'] = Field(
# 'Genotype',
# description='MUST be "Genotype"'
# )
# # TODO members temporarily typed as List instead of Set + validate unique items
# members: List[GenotypeMember] = Field(
# ...,
# description='Each GenotypeMember in `members` describes a MolecularVariation and the count of that variation at the locus.',
# min_length=1,
# )
# count: Union[Range, int] = Field(
# ...,
# description='The total number of copies of all MolecularVariation at this locus, MUST be greater than or equal to the sum of GenotypeMember copy counts. If greater than the total counts, this implies additional MolecularVariation that are expected to exist but are not explicitly indicated.',
# )
#
# class ga4gh(_Ga4ghIdentifiableObject.ga4gh):
# prefix = 'GT'
# keys = [
# 'count',
# 'members',
# 'type'
# ]


class SequenceExpression(RootModel):
Expand All @@ -534,7 +551,7 @@ class Location(RootModel):


class Variation(RootModel):
root: Union[Allele, CopyNumberChange, CopyNumberCount, Genotype, Haplotype] = Field(
root: Union[Allele, CopyNumberChange, CopyNumberCount, Haplotype] = Field(
...,
json_schema_extra={
'description': 'A representation of the state of one or more biomolecules.'
Expand All @@ -548,7 +565,7 @@ class SystemicVariation(RootModel):
sample, or homologous chromosomes.
"""

root: Union[CopyNumberChange, CopyNumberCount, Genotype] = Field(
root: Union[CopyNumberChange, CopyNumberCount] = Field(
...,
json_schema_extra={
'description': 'A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes.'
Expand Down
61 changes: 61 additions & 0 deletions tests/validation/test_vrs_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""test that VRS Python model structures match VRS Schema
"""
import yaml
from pathlib import Path
from ga4gh.vrs import models

ROOT_DIR = Path(__file__).parents[2]
VRS_SCHEMA_DIR = ROOT_DIR / 'submodules' / 'vrs' / 'schema'

with open(VRS_SCHEMA_DIR / 'vrs.yaml') as vrs_yaml:
VRS_SCHEMA = yaml.safe_load(vrs_yaml)

VRS_CONCRETE_CLASSES = set()
VRS_PRIMITIVES = set()

for vrs_class in VRS_SCHEMA['$defs']:
cls_def = VRS_SCHEMA['$defs'][vrs_class]
if 'properties' in cls_def:
VRS_CONCRETE_CLASSES.add(vrs_class)
elif cls_def.get('type') in ['array', 'int', 'str']:
VRS_PRIMITIVES.add(vrs_class)

NOT_IMPLEMENTED = ['Adjacency', 'Haplotype'] # Use this to skip testing of not-implemented classes
# TODO: Remove this once 2.0 models at beta


def test_schema_models_exist():
"""test that VRS Python covers the models defined by VRS
"""
for vrs_class in VRS_CONCRETE_CLASSES | VRS_PRIMITIVES:
if vrs_class in NOT_IMPLEMENTED:
continue
assert getattr(models, vrs_class, False)


def test_schema_class_fields_are_valid():
"""test that VRS Python model fields match the VRS specification
"""
for vrs_class in VRS_CONCRETE_CLASSES:
if vrs_class in NOT_IMPLEMENTED:
continue
schema_fields = set(VRS_SCHEMA['$defs'][vrs_class]['properties'])
pydantic_model = getattr(models, vrs_class)
assert set(pydantic_model.__fields__) == schema_fields, vrs_class


def test_model_keys_are_valid():
"""test that digest keys on Value Objects are valid and sorted
"""
for vrs_class in VRS_CONCRETE_CLASSES:
if vrs_class in NOT_IMPLEMENTED:
continue
if VRS_SCHEMA['$defs'][vrs_class].get('ga4ghDigest', {}).get('keys', None) is None:
continue
pydantic_model = getattr(models, vrs_class)
try:
pydantic_model_digest_keys = pydantic_model.ga4gh.keys
except AttributeError:
raise AttributeError(vrs_class)
assert set(pydantic_model_digest_keys) == set(VRS_SCHEMA['$defs'][vrs_class]['ga4ghDigest']['keys']), vrs_class
assert pydantic_model_digest_keys == sorted(pydantic_model.ga4gh.keys), vrs_class

0 comments on commit 6b789a2

Please sign in to comment.