From 453b84ad78f3003493410be16441508dfab7e853 Mon Sep 17 00:00:00 2001 From: Sebastian Urchs Date: Tue, 17 Jan 2023 17:26:27 -0500 Subject: [PATCH 1/5] Added initial data dictionary schema --- bagelbids/dictionary_models.py | 39 ++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 bagelbids/dictionary_models.py diff --git a/bagelbids/dictionary_models.py b/bagelbids/dictionary_models.py new file mode 100644 index 00000000..cdd53ef7 --- /dev/null +++ b/bagelbids/dictionary_models.py @@ -0,0 +1,39 @@ +from typing import Dict, Optional, Union + +from pydantic import BaseModel, conlist + + +class Identifier(BaseModel): + TermURL: str + Label: str + + +class Neurobagel(BaseModel): + IsAbout: Identifier + MissingValues: conlist(str, unique_items=True) + IsPartOf: Optional[Identifier] + + +class CategoricalNeurobagel(Neurobagel): + Levels: Dict[str, Identifier] + + +class ContinuousNeurobagel(Neurobagel): + Transformation: Identifier + + +class Column(BaseModel): + Description: str + Annotations: Optional[Union[CategoricalNeurobagel, ContinuousNeurobagel]] = None + + +class CategoricalColumn(Column): + Levels: Dict[str, str] + + +class ContinuousColumn(Column): + Units: str + + +class DataDictionary(BaseModel): + __root__: Dict[str, Union[ContinuousColumn, CategoricalColumn]] From b224d27a3ee7af3f54397898c89bdde637db9178 Mon Sep 17 00:00:00 2001 From: Sebastian Urchs Date: Tue, 17 Jan 2023 17:52:40 -0500 Subject: [PATCH 2/5] Added documentation to the data dictionary model types --- bagelbids/dictionary_models.py | 36 ++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/bagelbids/dictionary_models.py b/bagelbids/dictionary_models.py index cdd53ef7..6786eb16 100644 --- a/bagelbids/dictionary_models.py +++ b/bagelbids/dictionary_models.py @@ -1,38 +1,50 @@ from typing import Dict, Optional, Union -from pydantic import BaseModel, conlist +from pydantic import BaseModel, conlist, Field class Identifier(BaseModel): - TermURL: str - Label: str + TermURL: str = Field(..., description="An unambiguous identifier for the term, concept or entity that is referenced") + Label: str = Field(..., description="A human readable label. If more than one label exists for the term, " + "then the preferred label should be used.") class Neurobagel(BaseModel): - IsAbout: Identifier - MissingValues: conlist(str, unique_items=True) - IsPartOf: Optional[Identifier] + IsAbout: Identifier = Field(..., description="The concept or controlled term that describes this column") + MissingValues: conlist(str, unique_items=True) = Field(..., description="A list of unique values that represent " + "invalid responses, typos, or missing data") + IsPartOf: Optional[Identifier] = Field(..., description="If the column is a subscale or item of an assessment tool" + "then the assessment tool should be specified here.") class CategoricalNeurobagel(Neurobagel): - Levels: Dict[str, Identifier] + Levels: Dict[str, Identifier] = Field(..., description="For categorical variables: " + "An object of values (keys) in the column and the semantic" + "term (URI and label) they are unambiguously mapped to.") class ContinuousNeurobagel(Neurobagel): - Transformation: Identifier + Transformation: Identifier = Field(..., description="For continuous columns this field can be used to describe" + "a transformation that can be applied to the values in this" + "column in order to match the desired format of a standardized" + "data element referenced in the IsAbout attribute.") class Column(BaseModel): - Description: str - Annotations: Optional[Union[CategoricalNeurobagel, ContinuousNeurobagel]] = None + Description: str = Field(..., description="Free-form natural language description") + Annotations: Optional[Union[CategoricalNeurobagel, ContinuousNeurobagel]] = Field(None, + description="Semantic annotations") class CategoricalColumn(Column): - Levels: Dict[str, str] + Levels: Dict[str, str] = Field(..., description="For categorical variables: " + "An object of possible values (keys) " + "and their descriptions (values). ") class ContinuousColumn(Column): - Units: str + Units: str = Field(..., description="Measurement units for the values in this column. " + "SI units in CMIXF formatting are RECOMMENDED (see Units)") class DataDictionary(BaseModel): From fd3a5f67603ebf3a36c9e800d5495dc9c8753215 Mon Sep 17 00:00:00 2001 From: Sebastian Urchs Date: Tue, 17 Jan 2023 18:07:13 -0500 Subject: [PATCH 3/5] Add class description to data dictionary --- bagelbids/dictionary_models.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bagelbids/dictionary_models.py b/bagelbids/dictionary_models.py index 6786eb16..5d022ac7 100644 --- a/bagelbids/dictionary_models.py +++ b/bagelbids/dictionary_models.py @@ -4,12 +4,14 @@ class Identifier(BaseModel): + """An identifier of a controlled term with an IRI""" TermURL: str = Field(..., description="An unambiguous identifier for the term, concept or entity that is referenced") Label: str = Field(..., description="A human readable label. If more than one label exists for the term, " "then the preferred label should be used.") class Neurobagel(BaseModel): + """The base model for a Neurobagel column annotation""" IsAbout: Identifier = Field(..., description="The concept or controlled term that describes this column") MissingValues: conlist(str, unique_items=True) = Field(..., description="A list of unique values that represent " "invalid responses, typos, or missing data") @@ -18,12 +20,14 @@ class Neurobagel(BaseModel): class CategoricalNeurobagel(Neurobagel): + """A Neurobagel annotation for a categorical column""" Levels: Dict[str, Identifier] = Field(..., description="For categorical variables: " "An object of values (keys) in the column and the semantic" "term (URI and label) they are unambiguously mapped to.") class ContinuousNeurobagel(Neurobagel): + """A Neurobagel annotation for a continuous column""" Transformation: Identifier = Field(..., description="For continuous columns this field can be used to describe" "a transformation that can be applied to the values in this" "column in order to match the desired format of a standardized" @@ -31,21 +35,25 @@ class ContinuousNeurobagel(Neurobagel): class Column(BaseModel): + """The base model for a BIDS column description""" Description: str = Field(..., description="Free-form natural language description") Annotations: Optional[Union[CategoricalNeurobagel, ContinuousNeurobagel]] = Field(None, description="Semantic annotations") class CategoricalColumn(Column): + """A BIDS column annotation for a categorical column""" Levels: Dict[str, str] = Field(..., description="For categorical variables: " "An object of possible values (keys) " "and their descriptions (values). ") class ContinuousColumn(Column): + """A BIDS column annotation for a continuous column""" Units: str = Field(..., description="Measurement units for the values in this column. " "SI units in CMIXF formatting are RECOMMENDED (see Units)") class DataDictionary(BaseModel): + """A data dictionary with human and machine readable information for a tabular data file""" __root__: Dict[str, Union[ContinuousColumn, CategoricalColumn]] From a910ceb4aeed3b17ca73c1291cd46f15ac9c8c04 Mon Sep 17 00:00:00 2001 From: Sebastian Urchs Date: Tue, 17 Jan 2023 22:57:30 -0500 Subject: [PATCH 4/5] Use pythonic class attribute names Capitalize them via the alias --- bagelbids/dictionary_models.py | 48 ++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/bagelbids/dictionary_models.py b/bagelbids/dictionary_models.py index 5d022ac7..845bfb1d 100644 --- a/bagelbids/dictionary_models.py +++ b/bagelbids/dictionary_models.py @@ -5,53 +5,63 @@ class Identifier(BaseModel): """An identifier of a controlled term with an IRI""" - TermURL: str = Field(..., description="An unambiguous identifier for the term, concept or entity that is referenced") - Label: str = Field(..., description="A human readable label. If more than one label exists for the term, " - "then the preferred label should be used.") + termURL: str = Field(..., description="An unambiguous identifier for the term, concept or entity that is referenced", + alias="TermURL") + label: str = Field(..., description="A human readable label. If more than one label exists for the term, " + "then the preferred label should be used.", + alias="Label") class Neurobagel(BaseModel): """The base model for a Neurobagel column annotation""" - IsAbout: Identifier = Field(..., description="The concept or controlled term that describes this column") - MissingValues: conlist(str, unique_items=True) = Field(..., description="A list of unique values that represent " - "invalid responses, typos, or missing data") - IsPartOf: Optional[Identifier] = Field(..., description="If the column is a subscale or item of an assessment tool" - "then the assessment tool should be specified here.") + isAbout: Identifier = Field(..., description="The concept or controlled term that describes this column", + alias="IsAbout") + missingValues: conlist(str, unique_items=True) = Field(..., description="A list of unique values that represent " + "invalid responses, typos, or missing data", + alias="MissingValues") + isPartOf: Optional[Identifier] = Field(..., description="If the column is a subscale or item of an assessment tool" + "then the assessment tool should be specified here.", + alias="IsPartOf") class CategoricalNeurobagel(Neurobagel): """A Neurobagel annotation for a categorical column""" - Levels: Dict[str, Identifier] = Field(..., description="For categorical variables: " + levels: Dict[str, Identifier] = Field(..., description="For categorical variables: " "An object of values (keys) in the column and the semantic" - "term (URI and label) they are unambiguously mapped to.") + "term (URI and label) they are unambiguously mapped to.", + alias="Levels") class ContinuousNeurobagel(Neurobagel): """A Neurobagel annotation for a continuous column""" - Transformation: Identifier = Field(..., description="For continuous columns this field can be used to describe" + transformation: Identifier = Field(..., description="For continuous columns this field can be used to describe" "a transformation that can be applied to the values in this" "column in order to match the desired format of a standardized" - "data element referenced in the IsAbout attribute.") + "data element referenced in the IsAbout attribute.", + alias="Transformation") class Column(BaseModel): """The base model for a BIDS column description""" - Description: str = Field(..., description="Free-form natural language description") - Annotations: Optional[Union[CategoricalNeurobagel, ContinuousNeurobagel]] = Field(None, - description="Semantic annotations") + description: str = Field(..., description="Free-form natural language description", alias="Description") + annotations: Optional[Union[CategoricalNeurobagel, ContinuousNeurobagel]] = Field(None, + description="Semantic annotations", + alias="Annotations") class CategoricalColumn(Column): """A BIDS column annotation for a categorical column""" - Levels: Dict[str, str] = Field(..., description="For categorical variables: " + levels: Dict[str, str] = Field(..., description="For categorical variables: " "An object of possible values (keys) " - "and their descriptions (values). ") + "and their descriptions (values). ", + alias="Levels") class ContinuousColumn(Column): """A BIDS column annotation for a continuous column""" - Units: str = Field(..., description="Measurement units for the values in this column. " - "SI units in CMIXF formatting are RECOMMENDED (see Units)") + units: str = Field(..., description="Measurement units for the values in this column. " + "SI units in CMIXF formatting are RECOMMENDED (see Units)", + alias="Units") class DataDictionary(BaseModel): From ae1a7bc003148145d5da984de5e457527671d5f2 Mon Sep 17 00:00:00 2001 From: Sebastian Urchs Date: Thu, 26 Jan 2023 14:00:32 -0500 Subject: [PATCH 5/5] Add space in model description --- bagelbids/dictionary_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bagelbids/dictionary_models.py b/bagelbids/dictionary_models.py index 845bfb1d..decb52da 100644 --- a/bagelbids/dictionary_models.py +++ b/bagelbids/dictionary_models.py @@ -19,7 +19,7 @@ class Neurobagel(BaseModel): missingValues: conlist(str, unique_items=True) = Field(..., description="A list of unique values that represent " "invalid responses, typos, or missing data", alias="MissingValues") - isPartOf: Optional[Identifier] = Field(..., description="If the column is a subscale or item of an assessment tool" + isPartOf: Optional[Identifier] = Field(..., description="If the column is a subscale or item of an assessment tool " "then the assessment tool should be specified here.", alias="IsPartOf")