diff --git a/nmdc_schema/migrators/migrator_from_11_3_0_to_11_4_0.py b/nmdc_schema/migrators/migrator_from_11_3_0_to_11_4_0.py new file mode 100644 index 0000000000..a29d35c1de --- /dev/null +++ b/nmdc_schema/migrators/migrator_from_11_3_0_to_11_4_0.py @@ -0,0 +1,35 @@ +from nmdc_schema.migrators.migrator_base import MigratorBase + + +class Migrator(MigratorBase): + r"""Migrates a database between two schemas.""" + + _from_version = "11.3.0" + _to_version = "11.4.0" + + def upgrade(self): + r"""Migrates the database from conforming to the original schema, to conforming to the new schema.""" + + self.adapter.process_each_document("workflow_execution_set", [self.set_metab_analysis_category]) + + def set_metab_analysis_category(self, workflow: dict) -> dict: + r""" + If the workflow execution record is of the type "nmdc:MetabolomicsAnalysis" and it has a `has_metabolite_identifications` field, + add field `metabolomics_analysis_category` and assign it the value "gc_ms_metabolomics". If the record does not + have a `has_metabolite_identifications` field, it is assigned the value "lc_ms_lipidomics". + + >>> m = Migrator() + >>> m.set_metab_analysis_category({'id': 123, 'type': 'nmdc:MetabolomicsAnalysis', 'has_metabolite_identifications': []}) + {'id': 123, 'type': 'nmdc:MetabolomicsAnalysis', 'has_metabolite_identifications': [], 'metabolomics_analysis_category': 'gc_ms_metabolomics'} + >>> m.set_metab_analysis_category({'id': 123, 'type': 'nmdc:MetabolomicsAnalysis'}) # does not have has_metabolite_identifications field, therefore it's a lipid analysis + {'id': 123, 'type': 'nmdc:MetabolomicsAnalysis', 'metabolomics_analysis_category': 'lc_ms_lipidomics'} + >>> m.set_metab_analysis_category({'id': 123, 'type': 'nmdc:Metaproteomics'}) # not a metabolomics analysis + {'id': 123, 'type': 'nmdc:Metaproteomics'} + """ + + if workflow["type"] == "nmdc:MetabolomicsAnalysis": + if "has_metabolite_identifications" in workflow: + workflow["metabolomics_analysis_category"] = "gc_ms_metabolomics" + else: + workflow["metabolomics_analysis_category"] = "lc_ms_lipidomics" + return workflow \ No newline at end of file diff --git a/src/data/invalid/Database-MetabolomicsAnalysis-no_metabolomics_category.yaml b/src/data/invalid/Database-MetabolomicsAnalysis-no_metabolomics_category.yaml new file mode 100644 index 0000000000..60c22fce4c --- /dev/null +++ b/src/data/invalid/Database-MetabolomicsAnalysis-no_metabolomics_category.yaml @@ -0,0 +1,16 @@ +# this example is invalid because the records in the workflow execution set do not have a metabolomics_analysis_category value +workflow_execution_set: + - id: nmdc:wfmb-99-ABCDEF.1 + name: Metabolomics Analysis Activity for nmdc:wfmb-99-ABCDEF.1 + started_at_time: '2021-08-05T14:48:51+00:00' + ended_at_time: '2021-09-15T10:13:20+00:00' + execution_resource: NERSC-Cori + was_informed_by: nmdc:dgms-11-djad84 + git_url: https://example.org/WorkflowExecutionActivity + has_input: + - nmdc:dobj-99-xxxxxx1 + - nmdc:dobj-99-xxxxxx2 + has_output: + - nmdc:dobj-99-xxxxxx3 + - nmdc:dobj-99-xxxxxx4 + type: nmdc:MetabolomicsAnalysis \ No newline at end of file diff --git a/src/data/invalid/Database-invalid_calibration_slot.yaml b/src/data/invalid/Database-invalid_calibration_slot.yaml index 6a8511d520..d7ab10503b 100644 --- a/src/data/invalid/Database-invalid_calibration_slot.yaml +++ b/src/data/invalid/Database-invalid_calibration_slot.yaml @@ -52,3 +52,4 @@ workflow_execution_set: git_url: https://github.com/microbiomedata/metaMS started_at_time: '2021-01-07T23:54:40Z' was_informed_by: nmdc:dgms-13-122e4240 + metabolomics_analysis_category: gc_ms_metabolomics diff --git a/src/data/invalid/MetabolomicsAnalysis-invalid-has-slot-used.yaml b/src/data/invalid/MetabolomicsAnalysis-invalid-has-slot-used.yaml index e1e87cb9c3..b63e97bccd 100644 --- a/src/data/invalid/MetabolomicsAnalysis-invalid-has-slot-used.yaml +++ b/src/data/invalid/MetabolomicsAnalysis-invalid-has-slot-used.yaml @@ -12,3 +12,4 @@ has_input: has_output: - nmdc:dobj-11-547rwa36 - nmdc:dobj-11-547rwa37 +metabolomics_analysis_category: gc_ms_metabolomics \ No newline at end of file diff --git a/src/data/invalid/MetabolomicsAnalysis-invalid-missing_execution_resource.yaml b/src/data/invalid/MetabolomicsAnalysis-invalid-missing_execution_resource.yaml index 62744bbfec..33918ef10b 100644 --- a/src/data/invalid/MetabolomicsAnalysis-invalid-missing_execution_resource.yaml +++ b/src/data/invalid/MetabolomicsAnalysis-invalid-missing_execution_resource.yaml @@ -9,4 +9,5 @@ has_input: has_output: - nmdc:dobj-11-123d - nmdc:dobj-11-34dj4 -was_informed_by: nmdc:omprc-00-123456 \ No newline at end of file +was_informed_by: nmdc:omprc-00-123456 +metabolomics_analysis_category: gc_ms_metabolomics \ No newline at end of file diff --git a/src/data/invalid/MetabolomicsAnalysis-invalid_execution_resource_not_in_enum.yaml b/src/data/invalid/MetabolomicsAnalysis-invalid_execution_resource_not_in_enum.yaml index ea220f5e55..1ed9e6fd6b 100644 --- a/src/data/invalid/MetabolomicsAnalysis-invalid_execution_resource_not_in_enum.yaml +++ b/src/data/invalid/MetabolomicsAnalysis-invalid_execution_resource_not_in_enum.yaml @@ -11,3 +11,4 @@ has_output: - nmdc:dobj-11-123d - nmdc:dobj-11-34dj4 was_informed_by: nmdc:omprc-11-284u7d +metabolomics_analysis_category: gc_ms_metabolomics diff --git a/src/data/invalid/MetabolomicsAnalysis-invalid_id-1.yaml b/src/data/invalid/MetabolomicsAnalysis-invalid_id-1.yaml index aa1cb4b9a9..7d19238538 100644 --- a/src/data/invalid/MetabolomicsAnalysis-invalid_id-1.yaml +++ b/src/data/invalid/MetabolomicsAnalysis-invalid_id-1.yaml @@ -12,3 +12,4 @@ has_output: - nmdc:dobj-11-ndgg7b37 - nmdc:dobj-11-ndgg7b38 type: nmdc:MetabolomicsAnalysis +metabolomics_analysis_category: gc_ms_metabolomics diff --git a/src/data/invalid/MetabolomicsAnalysis-invalid_id-2.yaml b/src/data/invalid/MetabolomicsAnalysis-invalid_id-2.yaml index 36986fd0d0..90588b6aec 100644 --- a/src/data/invalid/MetabolomicsAnalysis-invalid_id-2.yaml +++ b/src/data/invalid/MetabolomicsAnalysis-invalid_id-2.yaml @@ -12,3 +12,4 @@ has_output: - nmdc:dobj-11-ndgg7b37 - nmdc:dobj-11-ndgg7b38 type: nmdc:MetabolomicsAnalysis +metabolomics_analysis_category: gc_ms_metabolomics diff --git a/src/data/invalid/MetabolomicsAnalysis-invalid_id-3.yaml b/src/data/invalid/MetabolomicsAnalysis-invalid_id-3.yaml index 4a245d021e..f135852dd0 100644 --- a/src/data/invalid/MetabolomicsAnalysis-invalid_id-3.yaml +++ b/src/data/invalid/MetabolomicsAnalysis-invalid_id-3.yaml @@ -12,3 +12,4 @@ has_output: - nmdc:dobj-11-ndgg7b37 - nmdc:dobj-11-ndgg7b38 type: nmdc:MetabolomicsAnalysis +metabolomics_analysis_category: gc_ms_metabolomics diff --git a/src/data/invalid/MetabolomicsAnalysis-invalid_id-4.yaml b/src/data/invalid/MetabolomicsAnalysis-invalid_id-4.yaml index d62580be1a..d46ba01002 100644 --- a/src/data/invalid/MetabolomicsAnalysis-invalid_id-4.yaml +++ b/src/data/invalid/MetabolomicsAnalysis-invalid_id-4.yaml @@ -12,3 +12,4 @@ has_output: - nmdc:dobj-11-ndgg7b37 - nmdc:dobj-11-ndgg7b38 type: nmdc:MetabolomicsAnalysis +metabolomics_analysis_category: gc_ms_metabolomics \ No newline at end of file diff --git a/src/data/invalid/MetabolomicsAnalysis-invalid_no_informed_by.yaml b/src/data/invalid/MetabolomicsAnalysis-invalid_no_informed_by.yaml index eef0f66693..ce0118d08a 100644 --- a/src/data/invalid/MetabolomicsAnalysis-invalid_no_informed_by.yaml +++ b/src/data/invalid/MetabolomicsAnalysis-invalid_no_informed_by.yaml @@ -11,3 +11,4 @@ has_output: - nmdc:dobj-11-ndgg7b37 - nmdc:dobj-11-ndgg7b38 type: nmdc:MetabolomicsAnalysis +metabolomics_analysis_category: gc_ms_metabolomics \ No newline at end of file diff --git a/src/data/invalid/MetabolomicsAnalysis-metab_quantified.yaml b/src/data/invalid/MetabolomicsAnalysis-metab_quantified.yaml index 0ba4fa280c..2ef839591f 100644 --- a/src/data/invalid/MetabolomicsAnalysis-metab_quantified.yaml +++ b/src/data/invalid/MetabolomicsAnalysis-metab_quantified.yaml @@ -1,3 +1,4 @@ +# this example is invalid because the has_metabolite_quantifications slot is used when it should be has_metabolite_identification id: nmdc:wfmb-99-ABCDEF.1 type: nmdc:MetabolomicsAnalysis started_at_time: '2021-08-05T14:48:51+00:00' @@ -18,3 +19,4 @@ has_metabolite_quantifications: highest_similarity_score: 0.9534156546099186 metabolite_quantified: chebi:16997 type: nmdc:MetaboliteQuantification +metabolomics_analysis_category: gc_ms_metabolomics diff --git a/src/data/valid/Database-Metabolomics-configuration.yaml b/src/data/valid/Database-Metabolomics-configuration.yaml index 6c36fd078d..bafdd0cdc0 100644 --- a/src/data/valid/Database-Metabolomics-configuration.yaml +++ b/src/data/valid/Database-Metabolomics-configuration.yaml @@ -12,6 +12,7 @@ workflow_execution_set: has_output: - nmdc:dobj-90-izwYW61 type: nmdc:MetabolomicsAnalysis + metabolomics_analysis_category: gc_ms_metabolomics data_object_set: - id: nmdc:dobj-70-izwYW6 data_category: workflow_parameter_data diff --git a/src/data/valid/Database-MetabolomicsAnalysis-1.yaml b/src/data/valid/Database-MetabolomicsAnalysis-1.yaml index 99787bff75..13340465f8 100644 --- a/src/data/valid/Database-MetabolomicsAnalysis-1.yaml +++ b/src/data/valid/Database-MetabolomicsAnalysis-1.yaml @@ -13,3 +13,4 @@ workflow_execution_set: - nmdc:dobj-99-xxxxxx3 - nmdc:dobj-99-xxxxxx4 type: nmdc:MetabolomicsAnalysis + metabolomics_analysis_category: gc_ms_metabolomics diff --git a/src/data/valid/Database-interleaved.yaml b/src/data/valid/Database-interleaved.yaml index f776ba86cc..83ed635885 100644 --- a/src/data/valid/Database-interleaved.yaml +++ b/src/data/valid/Database-interleaved.yaml @@ -3554,6 +3554,7 @@ workflow_execution_set: git_url: https://example.org/WorkflowExecutionActivity was_informed_by: nmdc:dgms-11-3u74ds started_at_time: '2021-08-05T14:48:51+00:00' + metabolomics_analysis_category: gc_ms_metabolomics - id: nmdc:wfmb-4d8-3z74d.1 type: nmdc:MetabolomicsAnalysis name: soil metabolomics analysis @@ -3570,6 +3571,7 @@ workflow_execution_set: - type: nmdc:MetaboliteIdentification highest_similarity_score: 0.88 metabolite_identified: CHEBI:16236 + metabolomics_analysis_category: gc_ms_metabolomics - id: nmdc:wfmgan-99-4d83z.1 type: nmdc:MetagenomeAnnotation name: human gut metagenome annotation diff --git a/src/data/valid/Database-lipid-workflow.yaml b/src/data/valid/Database-lipid-workflow.yaml index 930ec2d32a..7d71d052c1 100644 --- a/src/data/valid/Database-lipid-workflow.yaml +++ b/src/data/valid/Database-lipid-workflow.yaml @@ -16,6 +16,7 @@ workflow_execution_set: was_informed_by: "nmdc:dgms-11-vfzh1754" ended_at_time: "2024-12-19 03:09:58" version: "1.0.0" + metabolomics_analysis_category: lc_ms_lipidomics data_object_set: - id: "nmdc:dobj-11-r3xkmv70" diff --git a/src/data/valid/Database-metabolomics_calibration_example.yaml b/src/data/valid/Database-metabolomics_calibration_example.yaml index 1d02b1c72d..7a1d83faca 100644 --- a/src/data/valid/Database-metabolomics_calibration_example.yaml +++ b/src/data/valid/Database-metabolomics_calibration_example.yaml @@ -51,3 +51,4 @@ workflow_execution_set: git_url: https://github.com/microbiomedata/metaMS started_at_time: '2021-01-07T23:54:40Z' was_informed_by: nmdc:dgms-13-122e4240 + metabolomics_analysis_category: gc_ms_metabolomics diff --git a/src/data/valid/MetabolomicsAnalysis-1.yaml b/src/data/valid/MetabolomicsAnalysis-1.yaml index 744921df30..2e3960de8d 100644 --- a/src/data/valid/MetabolomicsAnalysis-1.yaml +++ b/src/data/valid/MetabolomicsAnalysis-1.yaml @@ -18,3 +18,4 @@ has_metabolite_identifications: highest_similarity_score: 0.9534156546099186 metabolite_identified: CHEBI:16997 type: nmdc:MetaboliteIdentification +metabolomics_analysis_category: gc_ms_metabolomics diff --git a/src/data/valid/MetabolomicsAnalysis-2.yaml b/src/data/valid/MetabolomicsAnalysis-2.yaml index 678be273c3..aa9d439b47 100644 --- a/src/data/valid/MetabolomicsAnalysis-2.yaml +++ b/src/data/valid/MetabolomicsAnalysis-2.yaml @@ -12,3 +12,4 @@ has_output: - nmdc:dobj-11-ndgg7b38 type: nmdc:MetabolomicsAnalysis was_informed_by: nmdc:dgms-11-dj832d +metabolomics_analysis_category: gc_ms_metabolomics diff --git a/src/schema/workflow_execution_activity.yaml b/src/schema/workflow_execution_activity.yaml index d8ca749c08..cad8e633e2 100644 --- a/src/schema/workflow_execution_activity.yaml +++ b/src/schema/workflow_execution_activity.yaml @@ -248,6 +248,7 @@ classes: slots: - has_metabolite_identifications - uses_calibration + - metabolomics_analysis_category slot_usage: id: required: true @@ -299,6 +300,12 @@ slots: The category of metaproteomics analysis being performed. required: true + metabolomics_analysis_category: + range: MetabolomicsAnalysisCategoryEnum + description: >- + The category of metabolomics analysis being performed. + required: true + metagenome_assembly_parameter: abstract: true @@ -547,3 +554,16 @@ enums: in_silico_metagenome: description: >- A metaproteomics analysis that is matched to an in silico generated metagenome. + + MetabolomicsAnalysisCategoryEnum: + description: The category of metabolomics analysis being performed. + permissible_values: + gc_ms_metabolomics: + description: >- + A metabolomics analysis that is performed on gas chromatography mass spectrometry data. + lc_ms_lipidomics: + description: >- + A metabolomics analysis that is performed on liquid chromatography mass spectrometry data for lipidomics annotation. + lc_ms_metabolomics: + description: >- + A metabolomics analysis that is performed on liquid chromatography mass spectrometry data.