diff --git a/viewer/download_structures.py b/viewer/download_structures.py index c5191f29..4382491e 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -35,7 +35,7 @@ 'bound_file': ('aligned'), 'cif_info': ('aligned'), 'mtz_info': ('aligned'), - # 'map_info': ('aligned'), + 'map_info': ('aligned'), 'sigmaa_file': ('aligned'), 'diff_file': ('aligned'), 'event_file': ('aligned'), @@ -56,8 +56,9 @@ 'bound_file': {}, # x 'cif_info': {}, # from experiment 'mtz_info': {}, # from experiment + 'map_info': {}, # from experiment 'event_file': {}, # x - 'diff_file': {}, # renamed from diff_file and sigmaa_file + 'diff_file': {}, 'sigmaa_file': {}, }, 'molecules': { @@ -229,6 +230,7 @@ def _add_file_to_zip_aligned(ziparchive, code, filepath): filepath = str(Path(settings.MEDIA_ROOT).joinpath(filepath)) if Path(filepath).is_file(): + # strip off the leading parts of path archive_path = str(Path(*Path(filepath).parts[7:])) if _is_mol_or_sdf(filepath): # It's a MOL or SD file. @@ -285,9 +287,13 @@ def _protein_files_zip(zip_contents, ziparchive, error_file): continue for prot, prot_file in files.items(): - if not _add_file_to_zip_aligned(ziparchive, prot.split(":")[0], prot_file): - error_file.write(f'{param},{prot},{prot_file}\n') - prot_errors += 1 + # if it's a list of files (map_info) instead of single file + if not isinstance(prot_file, list): + prot_file = [prot_file] + for f in prot_file: + if not _add_file_to_zip_aligned(ziparchive, prot.split(":")[0], f): + error_file.write(f'{param},{prot},{f}\n') + prot_errors += 1 return prot_errors @@ -606,10 +612,14 @@ def _create_structures_dict(target, site_obvs, protein_params, other_params): # getting the param from experiment. more data are # coming from there, that's why this is in try # block - # getattr retrieves FieldFile object, hance the .name - zip_contents['proteins'][param][so.code] = getattr( - so.experiment, param - ).name + model_attr = getattr(so.experiment, param) + # getattr retrieves FieldFile object, hence the .name + if isinstance(model_attr, list): + # except map_files, this returns a list of files + zip_contents['proteins'][param][so.code] = model_attr + else: + zip_contents['proteins'][param][so.code] = model_attr.name + except AttributeError: # on the off chance that the data are in site_observation model zip_contents['proteins'][param][so.code] = getattr(so, param).name @@ -686,6 +696,7 @@ def get_download_params(request): 'bound_file', 'cif_info', 'mtz_info', + 'map_info', 'event_file', 'sigmaa_file', 'diff_file', diff --git a/viewer/migrations/0034_experiment_map_info.py b/viewer/migrations/0034_experiment_map_info.py new file mode 100644 index 00000000..333c6988 --- /dev/null +++ b/viewer/migrations/0034_experiment_map_info.py @@ -0,0 +1,19 @@ +# Generated by Django 3.2.23 on 2024-01-26 11:16 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ('viewer', '0033_alter_siteobservation_cmpd'), + ] + + operations = [ + migrations.AddField( + model_name='experiment', + name='map_info', + field=models.FileField( + max_length=255, null=True, upload_to='target_loader_data/' + ), + ), + ] diff --git a/viewer/migrations/0035_alter_experiment_event_map_info.py b/viewer/migrations/0035_alter_experiment_event_map_info.py new file mode 100644 index 00000000..cca1fd89 --- /dev/null +++ b/viewer/migrations/0035_alter_experiment_event_map_info.py @@ -0,0 +1,22 @@ +# Generated by Django 3.2.23 on 2024-01-30 08:09 + +import django.contrib.postgres.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ('viewer', '0034_experiment_map_info'), + ] + + operations = [ + migrations.AlterField( + model_name='experiment', + name='event_map_info', + field=django.contrib.postgres.fields.ArrayField( + base_field=models.FileField(max_length=255, upload_to=''), + null=True, + size=None, + ), + ), + ] diff --git a/viewer/migrations/0036_remove_experiment_map_info.py b/viewer/migrations/0036_remove_experiment_map_info.py new file mode 100644 index 00000000..5fd950ac --- /dev/null +++ b/viewer/migrations/0036_remove_experiment_map_info.py @@ -0,0 +1,16 @@ +# Generated by Django 3.2.23 on 2024-01-30 08:12 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ('viewer', '0035_alter_experiment_event_map_info'), + ] + + operations = [ + migrations.RemoveField( + model_name='experiment', + name='map_info', + ), + ] diff --git a/viewer/migrations/0037_rename_event_map_info_experiment_map_info.py b/viewer/migrations/0037_rename_event_map_info_experiment_map_info.py new file mode 100644 index 00000000..049a3383 --- /dev/null +++ b/viewer/migrations/0037_rename_event_map_info_experiment_map_info.py @@ -0,0 +1,17 @@ +# Generated by Django 3.2.23 on 2024-01-30 08:12 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ('viewer', '0036_remove_experiment_map_info'), + ] + + operations = [ + migrations.RenameField( + model_name='experiment', + old_name='event_map_info', + new_name='map_info', + ), + ] diff --git a/viewer/models.py b/viewer/models.py index 62b44044..0feb65d3 100644 --- a/viewer/models.py +++ b/viewer/models.py @@ -193,7 +193,7 @@ class Experiment(models.Model): cif_info = models.FileField( upload_to="target_loader_data/", null=True, max_length=255 ) - event_map_info = ArrayField(models.FileField(), null=True) + map_info = ArrayField(models.FileField(max_length=255), null=True) type = models.PositiveSmallIntegerField(null=True) pdb_sha256 = models.TextField(null=True) compounds = models.ManyToManyField( diff --git a/viewer/serializers.py b/viewer/serializers.py index 807d6ffb..7ae642cc 100644 --- a/viewer/serializers.py +++ b/viewer/serializers.py @@ -832,6 +832,7 @@ class DownloadStructuresSerializer(serializers.Serializer): diff_file = serializers.BooleanField(default=False) event_file = serializers.BooleanField(default=False) sigmaa_file = serializers.BooleanField(default=False) + map_info = serializers.BooleanField(default=False) sdf_info = serializers.BooleanField(default=False) single_sdf_file = serializers.BooleanField(default=False) metadata_info = serializers.BooleanField(default=False) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 28286930..f9656edf 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -5,11 +5,12 @@ import os import tarfile import uuid +from collections.abc import Callable from dataclasses import dataclass, field from enum import Enum from pathlib import Path from tempfile import TemporaryDirectory -from typing import Any, Dict, Iterable, List, Optional, TypeVar +from typing import Any, Dict, Iterable, List, Optional, Tuple, TypeVar import yaml from celery import Task @@ -45,7 +46,7 @@ # data that goes to tables are in the following files # assemblies and xtalforms -XTALFORMS_FILE = "crystalforms.yaml" +XTALFORMS_FILE = "assemblies.yaml" # target name, nothing else CONFIG_FILE = "config*.yaml" @@ -313,12 +314,20 @@ def wrapper_create_objects( obj = None try: - obj, new = instance_data.model_class.filter_manager.by_target( - self.target - ).get_or_create( - **instance_data.fields, - defaults=instance_data.defaults, - ) + if instance_data.fields: + obj, new = instance_data.model_class.filter_manager.by_target( + self.target + ).get_or_create( + **instance_data.fields, + defaults=instance_data.defaults, + ) + else: + # no unique field requirements, just create new object + obj = instance_data.model_class( + **instance_data.defaults, + ) + obj.save() + new = True logger.debug( "%s object %s created", instance_data.model_class._meta.object_name, # pylint: disable=protected-access @@ -329,8 +338,9 @@ def wrapper_create_objects( else: existing = existing + 1 except MultipleObjectsReturned: - msg = "{}.get_or_create returned multiple objects for {}".format( + msg = "{}.get_or_create in {} returned multiple objects for {}".format( instance_data.model_class._meta.object_name, # pylint: disable=protected-access + instance_data.key, instance_data.fields, ) self.report.log(Level.FATAL, msg) @@ -457,6 +467,32 @@ def final_path(self) -> Path: def abs_final_path(self) -> Path: return self._abs_final_path + def validate_map_files( + self, + key: str, + obj_identifier: str, + file_struct: list, + ) -> list[str]: + """Validate list of panddas event files. + + Special case of file validation, too complex to squeeze into + the main validation method (mainly because of typing). + """ + + def logfunc(_, message): + self.report.log(Level.WARNING, message) + + result = [] + for item in file_struct: + fname, file_hash = self._check_file(item, obj_identifier, key, logfunc) + if not fname: + continue + + self._check_file_hash(obj_identifier, key, fname, file_hash, logfunc) + result.append(fname) + + return result + def validate_files( self, obj_identifier: str, @@ -509,48 +545,26 @@ def logfunc(key, message): # sort out the filename if isinstance(value, dict): - file_hash = value.get("sha256", None) - try: - filename = value["file"] - except KeyError: - # this is rather unexpected, haven't seen it yet - logfunc( - key, - "{}: malformed dict, key 'file' missing".format(obj_identifier), - ) - - # unable to extract file from dict, no point to - # continue with hash checking + filename, file_hash = self._check_file( + value, obj_identifier, key, logfunc + ) + if not filename: continue + self._check_file_hash(obj_identifier, key, filename, file_hash, logfunc) + elif isinstance(value, str): filename = value + self._check_file_hash(obj_identifier, key, filename, file_hash, logfunc) + else: - # this is probably the list of panddas event files, don't - # need them here - # although.. should i validate them here nevertheless? - # i'd have to do this on copy otherwise.. + # probably panddas files here continue # file key should go to result dict no matter what result[key] = filename logger.debug("Adding key %s: %s", key, filename) - # filename resolved, check if exists and if given, hash - file_path = self.raw_data.joinpath(filename) - if file_path.is_file(): - if file_hash and file_hash != calculate_sha256(file_path): - logfunc(key, "Invalid hash for file {}".format(filename)) - else: - logfunc( - key, - "{} referenced in {}: {} but not found in archive".format( - key, - METADATA_FILE, - obj_identifier, - ), - ) - files = [] for f in list(required) + list(recommended): try: @@ -564,11 +578,59 @@ def logfunc(key, message): METADATA_FILE, ), ) - files.append(None) + files.append(None) # type: ignore [arg-type] logger.debug("Returning files: %s", files) - return files + # memo to self: added type ignore directives to return line + # below and append line above because after small refactoring, + # mypy all of the sudden started throwing errors on bothe or + # these. the core of it's grievance is that it expects the + # return type to be list[str]. no idea why, function signature + # clearly defines it as list[str | None] + + return files # type: ignore [return-value] + + def _check_file( + self, + value: dict, + obj_identifier: str, + key: str, + logfunc: Callable, + ) -> Tuple[str | None, str | None]: + file_hash = value.get("sha256", None) + try: + filename = value["file"] + except KeyError: + # this is rather unexpected, haven't seen it yet + filename = None + logfunc( + key, + "{}: malformed dict, key 'file' missing".format(obj_identifier), + ) + return filename, file_hash + + def _check_file_hash( + self, + obj_identifier: str, + key: str, + filename: str, + file_hash: str | None, + logfunc: Callable, + ) -> None: + file_path = self.raw_data.joinpath(filename) + if file_path.is_file(): + if file_hash and file_hash != calculate_sha256(file_path): + logfunc(key, "Invalid hash for file {}".format(filename)) + else: + logfunc( + key, + "{} referenced in {}: {} but not found in archive".format( + key, + METADATA_FILE, + obj_identifier, + ), + ) @create_objects(depth=1) def process_experiment( @@ -589,9 +651,15 @@ def process_experiment( 'xtal_mtz': { 'file': 'upload_1/crystallographic_files/5rgs/5rgs.mtz', 'sha256': sha , - } - }, + }, + 'panddas_event_files': { + 'file': .ccp4, + 'sha256': sha , + 'model': '1', chain: B, res: 203, index: 1, bdc: 0.23 + }, 'status': 'new', + }, + } ) @@ -619,13 +687,24 @@ def process_experiment( ) = self.validate_files( obj_identifier=experiment_name, file_struct=data["crystallographic_files"], - required=("xtal_pdb",), recommended=( + "xtal_pdb", "xtal_mtz", "ligand_cif", ), ) + try: + panddas_files = data["crystallographic_files"]["panddas_event_files"] + except KeyError: + panddas_files = [] + + map_info_files = self.validate_map_files( + key="panddas_event_files", + obj_identifier=experiment_name, + file_struct=panddas_files, + ) + dtype = extract(key="type") if dtype == "manual": @@ -640,13 +719,16 @@ def process_experiment( dstatus = extract(key="status") - if dstatus == "new": - status = 0 - elif dstatus == "deprecated": - status = 1 - elif dstatus == "superseded": - status = 2 - else: + status_codes = { + "new": 0, + "deprecated": 1, + "superseded": 2, + "unchanged": 3, + } + + try: + status = status_codes[dstatus] + except KeyError: status = -1 self.report.log( Level.FATAL, f"Unexpected status '{dstatus}' for {experiment_name}" @@ -660,6 +742,11 @@ def process_experiment( "experiment_upload": self.experiment_upload, "code": experiment_name, } + + map_info_paths = [] + if map_info_files: + map_info_paths = [str(self._get_final_path(k)) for k in map_info_files] + defaults = { "status": status, "version": version, @@ -667,6 +754,7 @@ def process_experiment( "pdb_info": str(self._get_final_path(pdb_info)), "mtz_info": str(self._get_final_path(mtz_info)), "cif_info": str(self._get_final_path(cif_info)), + "map_info": map_info_paths, # this doesn't seem to be present # pdb_sha256: } @@ -730,14 +818,14 @@ def process_compound( ) return None - fields = { + defaults = { "smiles": smiles, + "compound_code": data.get("compound_code", None), } - defaults = {"compound_code": data.get("compound_code", None)} return ProcessedObject( model_class=Compound, - fields=fields, + fields={}, defaults=defaults, key=protein_name, ) @@ -1037,11 +1125,16 @@ def process_xtalform_site( "residues": residues, } + index_data = { + "residues": residues, + } + return ProcessedObject( model_class=XtalformSite, fields=fields, defaults=defaults, key=xtalform_site_name, + index_data=index_data, ) @create_objects(depth=5) @@ -1132,6 +1225,7 @@ def process_site_observation( ), ) + logger.debug('looking for ligand_mol: %s', ligand_mol) mol_data = None if ligand_mol: try: @@ -1141,7 +1235,7 @@ def process_site_observation( encoding="utf-8", ) as f: mol_data = f.read() - except TypeError: + except (TypeError, FileNotFoundError): # this site observation doesn't have a ligand. perfectly # legitimate case pass @@ -1413,7 +1507,7 @@ def process_bundle(self): # key for xtal sites objects? xtalform_site_by_tag = {} for val in xtalform_sites_objects.values(): # pylint: disable=no-member - for k in val.instance.residues: + for k in val.index_data["residues"]: xtalform_site_by_tag[k] = val.instance site_observation_objects = self.process_site_observation(