Skip to content

Commit

Permalink
Merge pull request #2332 from microbiomedata/2330_metabolomics_category
Browse files Browse the repository at this point in the history
Add slot and enumeration to `MetabolomicsAnalysis` and implements migrator
  • Loading branch information
kheal authored Jan 29, 2025
2 parents ba2a315 + 4f36c47 commit 1b42cef
Show file tree
Hide file tree
Showing 20 changed files with 91 additions and 1 deletion.
35 changes: 35 additions & 0 deletions nmdc_schema/migrators/migrator_from_11_3_0_to_11_4_0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from nmdc_schema.migrators.migrator_base import MigratorBase


class Migrator(MigratorBase):
r"""Migrates a database between two schemas."""

_from_version = "11.3.0"
_to_version = "11.4.0"

def upgrade(self):
r"""Migrates the database from conforming to the original schema, to conforming to the new schema."""

self.adapter.process_each_document("workflow_execution_set", [self.set_metab_analysis_category])

def set_metab_analysis_category(self, workflow: dict) -> dict:
r"""
If the workflow execution record is of the type "nmdc:MetabolomicsAnalysis" and it has a `has_metabolite_identifications` field,
add field `metabolomics_analysis_category` and assign it the value "gc_ms_metabolomics". If the record does not
have a `has_metabolite_identifications` field, it is assigned the value "lc_ms_lipidomics".
>>> m = Migrator()
>>> m.set_metab_analysis_category({'id': 123, 'type': 'nmdc:MetabolomicsAnalysis', 'has_metabolite_identifications': []})
{'id': 123, 'type': 'nmdc:MetabolomicsAnalysis', 'has_metabolite_identifications': [], 'metabolomics_analysis_category': 'gc_ms_metabolomics'}
>>> m.set_metab_analysis_category({'id': 123, 'type': 'nmdc:MetabolomicsAnalysis'}) # does not have has_metabolite_identifications field, therefore it's a lipid analysis
{'id': 123, 'type': 'nmdc:MetabolomicsAnalysis', 'metabolomics_analysis_category': 'lc_ms_lipidomics'}
>>> m.set_metab_analysis_category({'id': 123, 'type': 'nmdc:Metaproteomics'}) # not a metabolomics analysis
{'id': 123, 'type': 'nmdc:Metaproteomics'}
"""

if workflow["type"] == "nmdc:MetabolomicsAnalysis":
if "has_metabolite_identifications" in workflow:
workflow["metabolomics_analysis_category"] = "gc_ms_metabolomics"
else:
workflow["metabolomics_analysis_category"] = "lc_ms_lipidomics"
return workflow
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# this example is invalid because the records in the workflow execution set do not have a metabolomics_analysis_category value
workflow_execution_set:
- id: nmdc:wfmb-99-ABCDEF.1
name: Metabolomics Analysis Activity for nmdc:wfmb-99-ABCDEF.1
started_at_time: '2021-08-05T14:48:51+00:00'
ended_at_time: '2021-09-15T10:13:20+00:00'
execution_resource: NERSC-Cori
was_informed_by: nmdc:dgms-11-djad84
git_url: https://example.org/WorkflowExecutionActivity
has_input:
- nmdc:dobj-99-xxxxxx1
- nmdc:dobj-99-xxxxxx2
has_output:
- nmdc:dobj-99-xxxxxx3
- nmdc:dobj-99-xxxxxx4
type: nmdc:MetabolomicsAnalysis
1 change: 1 addition & 0 deletions src/data/invalid/Database-invalid_calibration_slot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,4 @@ workflow_execution_set:
git_url: https://github.com/microbiomedata/metaMS
started_at_time: '2021-01-07T23:54:40Z'
was_informed_by: nmdc:dgms-13-122e4240
metabolomics_analysis_category: gc_ms_metabolomics
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ has_input:
has_output:
- nmdc:dobj-11-547rwa36
- nmdc:dobj-11-547rwa37
metabolomics_analysis_category: gc_ms_metabolomics
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ has_input:
has_output:
- nmdc:dobj-11-123d
- nmdc:dobj-11-34dj4
was_informed_by: nmdc:omprc-00-123456
was_informed_by: nmdc:omprc-00-123456
metabolomics_analysis_category: gc_ms_metabolomics
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ has_output:
- nmdc:dobj-11-123d
- nmdc:dobj-11-34dj4
was_informed_by: nmdc:omprc-11-284u7d
metabolomics_analysis_category: gc_ms_metabolomics
1 change: 1 addition & 0 deletions src/data/invalid/MetabolomicsAnalysis-invalid_id-1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ has_output:
- nmdc:dobj-11-ndgg7b37
- nmdc:dobj-11-ndgg7b38
type: nmdc:MetabolomicsAnalysis
metabolomics_analysis_category: gc_ms_metabolomics
1 change: 1 addition & 0 deletions src/data/invalid/MetabolomicsAnalysis-invalid_id-2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ has_output:
- nmdc:dobj-11-ndgg7b37
- nmdc:dobj-11-ndgg7b38
type: nmdc:MetabolomicsAnalysis
metabolomics_analysis_category: gc_ms_metabolomics
1 change: 1 addition & 0 deletions src/data/invalid/MetabolomicsAnalysis-invalid_id-3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ has_output:
- nmdc:dobj-11-ndgg7b37
- nmdc:dobj-11-ndgg7b38
type: nmdc:MetabolomicsAnalysis
metabolomics_analysis_category: gc_ms_metabolomics
1 change: 1 addition & 0 deletions src/data/invalid/MetabolomicsAnalysis-invalid_id-4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ has_output:
- nmdc:dobj-11-ndgg7b37
- nmdc:dobj-11-ndgg7b38
type: nmdc:MetabolomicsAnalysis
metabolomics_analysis_category: gc_ms_metabolomics
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ has_output:
- nmdc:dobj-11-ndgg7b37
- nmdc:dobj-11-ndgg7b38
type: nmdc:MetabolomicsAnalysis
metabolomics_analysis_category: gc_ms_metabolomics
2 changes: 2 additions & 0 deletions src/data/invalid/MetabolomicsAnalysis-metab_quantified.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# this example is invalid because the has_metabolite_quantifications slot is used when it should be has_metabolite_identification
id: nmdc:wfmb-99-ABCDEF.1
type: nmdc:MetabolomicsAnalysis
started_at_time: '2021-08-05T14:48:51+00:00'
Expand All @@ -18,3 +19,4 @@ has_metabolite_quantifications:
highest_similarity_score: 0.9534156546099186
metabolite_quantified: chebi:16997
type: nmdc:MetaboliteQuantification
metabolomics_analysis_category: gc_ms_metabolomics
1 change: 1 addition & 0 deletions src/data/valid/Database-Metabolomics-configuration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ workflow_execution_set:
has_output:
- nmdc:dobj-90-izwYW61
type: nmdc:MetabolomicsAnalysis
metabolomics_analysis_category: gc_ms_metabolomics
data_object_set:
- id: nmdc:dobj-70-izwYW6
data_category: workflow_parameter_data
Expand Down
1 change: 1 addition & 0 deletions src/data/valid/Database-MetabolomicsAnalysis-1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ workflow_execution_set:
- nmdc:dobj-99-xxxxxx3
- nmdc:dobj-99-xxxxxx4
type: nmdc:MetabolomicsAnalysis
metabolomics_analysis_category: gc_ms_metabolomics
2 changes: 2 additions & 0 deletions src/data/valid/Database-interleaved.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3554,6 +3554,7 @@ workflow_execution_set:
git_url: https://example.org/WorkflowExecutionActivity
was_informed_by: nmdc:dgms-11-3u74ds
started_at_time: '2021-08-05T14:48:51+00:00'
metabolomics_analysis_category: gc_ms_metabolomics
- id: nmdc:wfmb-4d8-3z74d.1
type: nmdc:MetabolomicsAnalysis
name: soil metabolomics analysis
Expand All @@ -3570,6 +3571,7 @@ workflow_execution_set:
- type: nmdc:MetaboliteIdentification
highest_similarity_score: 0.88
metabolite_identified: CHEBI:16236
metabolomics_analysis_category: gc_ms_metabolomics
- id: nmdc:wfmgan-99-4d83z.1
type: nmdc:MetagenomeAnnotation
name: human gut metagenome annotation
Expand Down
1 change: 1 addition & 0 deletions src/data/valid/Database-lipid-workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ workflow_execution_set:
was_informed_by: "nmdc:dgms-11-vfzh1754"
ended_at_time: "2024-12-19 03:09:58"
version: "1.0.0"
metabolomics_analysis_category: lc_ms_lipidomics

data_object_set:
- id: "nmdc:dobj-11-r3xkmv70"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ workflow_execution_set:
git_url: https://github.com/microbiomedata/metaMS
started_at_time: '2021-01-07T23:54:40Z'
was_informed_by: nmdc:dgms-13-122e4240
metabolomics_analysis_category: gc_ms_metabolomics
1 change: 1 addition & 0 deletions src/data/valid/MetabolomicsAnalysis-1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ has_metabolite_identifications:
highest_similarity_score: 0.9534156546099186
metabolite_identified: CHEBI:16997
type: nmdc:MetaboliteIdentification
metabolomics_analysis_category: gc_ms_metabolomics
1 change: 1 addition & 0 deletions src/data/valid/MetabolomicsAnalysis-2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ has_output:
- nmdc:dobj-11-ndgg7b38
type: nmdc:MetabolomicsAnalysis
was_informed_by: nmdc:dgms-11-dj832d
metabolomics_analysis_category: gc_ms_metabolomics
20 changes: 20 additions & 0 deletions src/schema/workflow_execution_activity.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ classes:
slots:
- has_metabolite_identifications
- uses_calibration
- metabolomics_analysis_category
slot_usage:
id:
required: true
Expand Down Expand Up @@ -299,6 +300,12 @@ slots:
The category of metaproteomics analysis being performed.
required: true

metabolomics_analysis_category:
range: MetabolomicsAnalysisCategoryEnum
description: >-
The category of metabolomics analysis being performed.
required: true

metagenome_assembly_parameter:
abstract: true

Expand Down Expand Up @@ -547,3 +554,16 @@ enums:
in_silico_metagenome:
description: >-
A metaproteomics analysis that is matched to an in silico generated metagenome.
MetabolomicsAnalysisCategoryEnum:
description: The category of metabolomics analysis being performed.
permissible_values:
gc_ms_metabolomics:
description: >-
A metabolomics analysis that is performed on gas chromatography mass spectrometry data.
lc_ms_lipidomics:
description: >-
A metabolomics analysis that is performed on liquid chromatography mass spectrometry data for lipidomics annotation.
lc_ms_metabolomics:
description: >-
A metabolomics analysis that is performed on liquid chromatography mass spectrometry data.

0 comments on commit 1b42cef

Please sign in to comment.