From 7c23807c3365e7d7c95de2d32b09d4c26a50e5e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20Jer=C5=A1e?= Date: Mon, 18 Mar 2024 10:40:17 +0100 Subject: [PATCH] Add variants related models --- docs/CHANGELOG.rst | 1 + src/resdk/query.py | 43 ++++++- src/resdk/resolwe.py | 9 ++ src/resdk/resources/__init__.py | 9 ++ src/resdk/resources/base.py | 94 +++++++++------ src/resdk/resources/data.py | 3 + src/resdk/resources/sample.py | 36 +++++- src/resdk/resources/variants.py | 204 ++++++++++++++++++++++++++++++++ 8 files changed, 363 insertions(+), 36 deletions(-) create mode 100644 src/resdk/resources/variants.py diff --git a/docs/CHANGELOG.rst b/docs/CHANGELOG.rst index cf8b2f16..7c32ecf4 100644 --- a/docs/CHANGELOG.rst +++ b/docs/CHANGELOG.rst @@ -16,6 +16,7 @@ Changed Added ----- - Add ``restart`` method to the ``Data`` resource +- Add variants related models Fixed ----- diff --git a/src/resdk/query.py b/src/resdk/query.py index 3a0ceb97..d3bff4db 100644 --- a/src/resdk/query.py +++ b/src/resdk/query.py @@ -162,24 +162,35 @@ def _clone(self): def _dehydrate_resources(self, obj): """Iterate through object and replace all objects with their ids.""" + print("Dehydrating", obj, type(obj)) if isinstance(obj, BaseResource): + print("Base") return obj.id if isinstance(obj, dict): + print("Dict") return {key: self._dehydrate_resources(value) for key, value in obj.items()} if self._non_string_iterable(obj): + print("Non string iterable") return [self._dehydrate_resources(element) for element in obj] - + print("Returning unchanged", obj, type(obj)) return obj def _add_filter(self, filter_): """Add filtering parameters.""" + print("Filter for ", self.resource) for key, value in filter_.items(): # 'sample' is called 'entity' in the backend. - key = key.replace("sample", "entity") + if not self.resource.__name__.startswith("Variant"): + print("Replacing sample with entity in", key) + key = key.replace("sample", "entity") + print("Adding filter", key, value) value = self._dehydrate_resources(value) + print("Dehidrated value", value, type(value)) if self._non_string_iterable(value): + print("Iterable") value = ",".join(map(str, value)) if self.resource.query_method == "GET": + print("Appending value", value) self._filters[key].append(value) elif self.resource.query_method == "POST": self._filters[key] = value @@ -211,6 +222,8 @@ def _fetch(self): filters = self._compose_filters() if self.resource.query_method == "GET": + print("Query with filters", filters) + print("My api", self.api) items = self.api.get(**filters) elif self.resource.query_method == "POST": items = self.api.post(filters) @@ -285,6 +298,8 @@ def get(self, *args, **kwargs): kwargs["limit"] = kwargs.get("limit", 1) new_query = self._clone() + + print("Adding filters", kwargs) new_query._add_filter(kwargs) response = list(new_query) @@ -400,6 +415,30 @@ def from_path(self, full_path: str) -> "AnnotationField": return self.get(name=field_name, group__name=group_name) +class VariantCallQuery(ResolweQuery): + """Do not translate 'sample' to 'entity'.""" + + def _add_filter(self, filter_): + """Add filtering parameters.""" + for key, value in filter_.items(): + # 'sample' is called 'entity' in the backend. + print("Adding filter", key, value) + value = self._dehydrate_resources(value) + print("Dehidrated value", value, type(value)) + if self._non_string_iterable(value): + print("Iterable") + value = ",".join(map(str, value)) + if self.resource.query_method == "GET": + print("Appending value", value) + self._filters[key].append(value) + elif self.resource.query_method == "POST": + self._filters[key] = value + else: + raise NotImplementedError( + "Unsupported query_method: {}".format(self.resource.query_method) + ) + + class AnnotationValueQuery(ResolweQuery): """Populate Annotation fields with a single query.""" diff --git a/src/resdk/resolwe.py b/src/resdk/resolwe.py index 63268dc1..4a897fb3 100644 --- a/src/resdk/resolwe.py +++ b/src/resdk/resolwe.py @@ -47,6 +47,10 @@ Relation, Sample, User, + Variant, + VariantAnnotation, + VariantCall, + VariantExperiment, ) from .resources.base import BaseResource from .resources.kb import Feature, Mapping @@ -114,6 +118,10 @@ class Resolwe: resource_query_mapping = { AnnotationField: "annotation_field", AnnotationValue: "annotation_value", + Variant: "variant", + VariantAnnotation: "variant_annotation", + VariantExperiment: "variant_experiment", + VariantCall: "variant_calls", Data: "data", Collection: "collection", Sample: "sample", @@ -126,6 +134,7 @@ class Resolwe: Mapping: "mapping", Geneset: "geneset", Metadata: "metadata", + Variant: "variant", } # Map ResolweQuery name to it's slug_field slug_field_mapping = { diff --git a/src/resdk/resources/__init__.py b/src/resdk/resources/__init__.py index 061a7964..d05d38cb 100644 --- a/src/resdk/resources/__init__.py +++ b/src/resdk/resources/__init__.py @@ -54,6 +54,10 @@ :members: :inherited-members: +.. autoclass:: resdk.resources.Variants + :members: + :inherited-members: + .. autoclass:: resdk.resources.User :members: :inherited-members: @@ -102,6 +106,7 @@ from .relation import Relation from .sample import Sample from .user import Group, User +from .variants import Variant, VariantAnnotation, VariantCall, VariantExperiment __all__ = ( "AnnotationField", @@ -117,4 +122,8 @@ "Process", "Relation", "User", + "Variant", + "VariantAnnotation", + "VariantCall", + "VariantExperiment", ) diff --git a/src/resdk/resources/base.py b/src/resdk/resources/base.py index ded2ed7d..5ebddba2 100644 --- a/src/resdk/resources/base.py +++ b/src/resdk/resources/base.py @@ -28,6 +28,7 @@ class BaseResource: full_search_paramater = None delete_warning_single = "Do you really want to delete {}?[yN]" delete_warning_bulk = "Do you really want to delete {} objects?[yN]" + nested_serialize = False READ_ONLY_FIELDS = ("id",) UPDATE_PROTECTED_FIELDS = () @@ -77,44 +78,66 @@ def update(self): response = self.api(self.id).get() self._update_fields(response) + def __hash__(self): + """Return hash of the object.""" + return hash(self.id) + + def _serialize(self): + """Serialize the object. + + By default, return the dictionary only with id or slug. The slug is used if id + does not exist yet. + """ + if self.nested_serialize: + return self._nested_serialize() + else: + return {"id": self.id} if self.id else {"slug": self.slug} + + def _nested_serialize(self): + """Nested serialize the object.""" + return {"id": self.id} | { + field_name: self._dehydrate_resources(getattr(self, field_name)) + for field_name in self.WRITABLE_FIELDS + if self._field_changed(field_name) + } + def _dehydrate_resources(self, obj): - """Iterate through object and replace all objects with their ids.""" - # Prevent circular imports: - from .descriptor import DescriptorSchema - from .process import Process + """Return the serialized obj. - if isinstance(obj, DescriptorSchema) or isinstance(obj, Process): - # Slug can only be given at create requests (id not present yet) - if not self.id: - return {"slug": obj.slug} + Special attention is given to the following cases: + - obj is a BaseResource: return the serialized object. + - obj is a list: return a list of serialized objects. + - obj is a dict: replace values with serialized objects. - return {"id": obj.id} + Otherwise, return the object as is. + """ if isinstance(obj, BaseResource): - return {"id": obj.id} + return obj._serialize() if isinstance(obj, list): - return [self._dehydrate_resources(element) for element in obj] + return [element._dehydrate_resources(element) for element in obj] if isinstance(obj, dict): return {key: self._dehydrate_resources(value) for key, value in obj.items()} - return obj - def save(self): - """Save resource to the server.""" + def _field_changed(self, field_name): + """Check if local field value is different from the server.""" + current_value = getattr(self, field_name, None) + original_value = self._original_values.get(field_name, None) - def field_changed(field_name): - """Check if local field value is different from the server.""" - current_value = getattr(self, field_name, None) - original_value = self._original_values.get(field_name, None) + # The default implementation only checks for equality, since we do not support + # nested updates in most cases. + if isinstance(current_value, BaseResource) and original_value: + # TODO: Check that current and original are instances of the same resource class + return current_value.id != original_value.get("id", None) + else: + return current_value != original_value - if isinstance(current_value, BaseResource) and original_value: - # TODO: Check that current and original are instances of the same resource class - return current_value.id != original_value.get("id", None) - else: - return current_value != original_value + def save(self): + """Save resource to the server.""" def assert_fields_unchanged(field_names): """Assert that fields in ``field_names`` were not changed.""" - changed_fields = [name for name in field_names if field_changed(name)] + changed_fields = [name for name in field_names if self._field_changed(name)] if changed_fields: msg = "Not allowed to change read only fields {}".format( @@ -129,7 +152,7 @@ def assert_fields_unchanged(field_names): payload = {} for field_name in self.WRITABLE_FIELDS: - if field_changed(field_name): + if self._field_changed(field_name): payload[field_name] = self._dehydrate_resources( getattr(self, field_name) ) @@ -214,18 +237,23 @@ def __eq__(self, obj): else: return False - def _resource_setter(self, payload, resource, field): - """Set ``resource`` with ``payload`` on ``field``.""" + def _get_resourse(self, payload, resource): + """Get ``resource`` from ``payload``.""" if isinstance(payload, resource): - setattr(self, field, payload) + return payload elif isinstance(payload, dict): - setattr(self, field, resource(resolwe=self.resolwe, **payload)) + return resource(resolwe=self.resolwe, **payload) elif isinstance(payload, int): - setattr(self, field, resource.fetch_object(self.resolwe, id=payload)) + return resource.fetch_object(self.resolwe, id=payload) elif isinstance(payload, str): - setattr(self, field, resource.fetch_object(self.resolwe, slug=payload)) - else: - setattr(self, field, payload) + return resource.fetch_object(self.resolwe, slug=payload) + elif isinstance(payload, list): + return [self._get_resourse(item, resource) for item in payload] + return payload + + def _resource_setter(self, payload, resource, field): + """Set ``resource`` with ``payload`` on ``field``.""" + setattr(self, field, self._get_resourse(payload, resource)) class BaseResolweResource(BaseResource): diff --git a/src/resdk/resources/data.py b/src/resdk/resources/data.py index adabde77..ac4ad34c 100644 --- a/src/resdk/resources/data.py +++ b/src/resdk/resources/data.py @@ -158,16 +158,19 @@ def descriptor_schema(self, payload): def sample(self): """Get sample.""" if self._sample is None and self._original_values.get("entity", None): + print("Sample getter not set") # The collection data is only serialized on the top level. Replace the # data inside 'entity' with the actual collection data. entity_values = self._original_values["entity"].copy() entity_values["collection"] = self._original_values.get("collection", None) self._sample = Sample(resolwe=self.resolwe, **entity_values) + print("Sample getter", self._sample) return self._sample @sample.setter def sample(self, payload): """Set sample.""" + print("Sample setter", payload) self._resource_setter(payload, Sample, "_sample") @property diff --git a/src/resdk/resources/sample.py b/src/resdk/resources/sample.py index 25cf7c14..8089ffaf 100644 --- a/src/resdk/resources/sample.py +++ b/src/resdk/resources/sample.py @@ -1,7 +1,7 @@ """Sample resource.""" import logging -from typing import TYPE_CHECKING, Any, Dict, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional from resdk.exceptions import ResolweServerError from resdk.shortcuts.sample import SampleUtilsMixin @@ -9,6 +9,7 @@ from ..utils.decorators import assert_object_exists from .background_task import BackgroundTask from .collection import BaseCollection, Collection +from .variants import Variant if TYPE_CHECKING: from .annotations import AnnotationValue @@ -39,6 +40,10 @@ def __init__(self, resolwe, **model_data): self._background = None #: is this sample background to any other sample? self._is_background = None + #: list of ``Variant`` objects attached to the sample + self._variants = None + #: list of ``VariantExperiment`` objects attached to the sample + self._experiments = None super().__init__(resolwe, **model_data) @@ -48,6 +53,8 @@ def update(self): self._relations = None self._background = None self._is_background = None + self._variants = None + self._experiments = None super().update() @@ -60,6 +67,33 @@ def data(self): return self._data + @property + def experiments(self): + """Get experiments.""" + if self._experiments is None: + self._experiments = self.resolwe.variant_experiment.filter( + variant_calls__sample=self.id + ) + return self._experiments + + @property + def latest_experiment(self): + """Get latest experiment.""" + return self.experiments.filter(ordering="-timestamp", limit=1)[0] + + @property + def variants(self): + """Get variants.""" + if self._variants is None: + self._variants = self.resolwe.variant.filter(variant_calls__sample=self.id) + return self._variants + + def variants_by_experiment(self, experiment): + """Get variants for sample detected by the given experiment.""" + return self.resolwe.variant.filter( + variant_calls__sample=self.id, variant_calls__experiment=experiment.id + ) + @property def collection(self): """Get collection.""" diff --git a/src/resdk/resources/variants.py b/src/resdk/resources/variants.py new file mode 100644 index 00000000..e83405cc --- /dev/null +++ b/src/resdk/resources/variants.py @@ -0,0 +1,204 @@ +"""Variant resources.""" + +from typing import Any + +from .base import BaseResource + + +class Variant(BaseResource): + """ResolweBio Variant resource.""" + + endpoint = "variant" + nested_serialize = True + READ_ONLY_FIELDS = BaseResource.READ_ONLY_FIELDS + WRITABLE_FIELDS = ( + "species", + "genome_assembly", + "chromosome", + "position", + "reference", + "alternative", + "annotation", + ) + + @property + def annotation(self): + """Get the annotation for this variant.""" + return self._annotation + + @annotation.setter + def annotation(self, payload): + """Set annotation.""" + if isinstance(payload, dict): + payload["variant"] = self + self._resource_setter(payload, VariantAnnotation, "_annotation") + + def _field_changed(self, field_name): + """Detect changes to nested field annotation.""" + if field_name == "annotation": + return self.annotation.has_changes() + else: + return super()._field_changed(field_name) + + def __repr__(self) -> str: + """Return string representation.""" + return ( + f"Variant " + ) + + +class VariantAnnotation(BaseResource): + """VariantAnnotation resource.""" + + endpoint = "variant_annotations" + nested_serialize = True + + READ_ONLY_FIELDS = BaseResource.READ_ONLY_FIELDS + ("variant",) + WRITABLE_FIELDS = ( + "type", + "clinical_diagnosis", + "clinical_significance", + "dbsnp_id", + "clinvar_id", + "data", + "transcripts", + ) + + @property + def transcripts(self): + """Get the transcripts for this variant annotation.""" + return self._transcripts + + @transcripts.setter + def transcripts(self, payload): + """Set transcripts.""" + if payload: + for transcript in payload: + transcript["variant_annotation"] = self + self._resource_setter(payload, VariantAnnotationTranscript, "_transcripts") + + def _field_changed(self, field_name): + """Detect changes to nested field transcripts.""" + if self.id is None: + return True + if field_name == "transcripts" and self.transcripts: + return any(transcript.has_changes() for transcript in self.transcripts) + else: + return super()._field_changed(field_name) + + def has_changes(self) -> bool: + """Check if the object has changes.""" + return self.id is None or any( + self._field_changed(field_name) for field_name in self.WRITABLE_FIELDS + ) + + def __repr__(self) -> str: + """Return string representation.""" + return f"VariantAnnotation " + + +class VariantAnnotationTranscript(BaseResource): + """VariantAnnotationTranscript resource.""" + + nested_serialize = True + endpoint = "variant_annotation_transcript" + READ_ONLY_FIELDS = BaseResource.READ_ONLY_FIELDS + ("variant_annotation",) + WRITABLE_FIELDS = ( + "annotation", + "gene", + "annotation_impact", + "transcript_id", + "canonical", + ) + + def has_changes(self) -> bool: + """Check if the object has changes.""" + return self.id is None or any( + self._field_changed(field_name) for field_name in self.WRITABLE_FIELDS + ) + + def _nested_serialize(self): + """Nested serialize the object.""" + return { + field_name: self._dehydrate_resources(getattr(self, field_name)) + for field_name in self.WRITABLE_FIELDS + } + + +class VariantExperiment(BaseResource): + """Variant experiment resource.""" + + endpoint = "variant_experiment" + + READ_ONLY_FIELDS = BaseResource.READ_ONLY_FIELDS + ( + "variant_data_source", + "timestamp", + "contributor", + ) + + def __repr__(self) -> str: + """Return string representation.""" + return f"VariantExperiment " + + +class VariantCall(BaseResource): + """VariantCall resource.""" + + endpoint = "variant_calls" + + READ_ONLY_FIELDS = BaseResource.READ_ONLY_FIELDS + ( + "sample_id", + "variant_id", + "quality", + "depth_norm_quality", + "alternative_allele_depth", + "depth", + "genotype", + "genotype_quality", + "filter", + "data_id", + "experiment_id", + ) + + def __init__(self, resolwe, **model_data: Any): + """Initialize object.""" + super().__init__(resolwe, **model_data) + self._data = None + self._sample = None + self._experiment = None + self._variant = None + + @property + def data(self): + """Get the data object for this variant call.""" + if self._data is None: + self._data = self.resolwe.data.get(self.data_id) + return self._data + + @property + def sample(self): + """Get the sample object for this variant call.""" + if self._sample is None: + self._sample = self.resolwe.sample.get(self.sample_id) + return self._sample + + @property + def experiment(self): + """Get the experiment object for this variant call.""" + if self._experiment is None: + self._experiment = self.resolwe.variant_experiment.get( + id=self.experiment_id + ) + return self._experiment + + @property + def variant(self): + """Get the variant object for this variant call.""" + if self._variant is None: + self._variant = self.resolwe.variant.get(id=self.variant_id) + return self._variant + + def __repr__(self) -> str: + """Return string representation.""" + return f"VariantCall "