diff --git a/kif_lib/compiler/sparql/mapping/pubchem.py b/kif_lib/compiler/sparql/mapping/pubchem.py index a095136..f9b0ca9 100644 --- a/kif_lib/compiler/sparql/mapping/pubchem.py +++ b/kif_lib/compiler/sparql/mapping/pubchem.py @@ -93,6 +93,9 @@ def set_normalize_casrn(self, normalize_casrn: bool | None) -> None: _re_canonical_smiles: Final[re.Pattern] = re.compile( r'^[A-Za-z0-9+\-\*=#$:()\.>/\\\[\]%]+$') + + _re_iupac_name: Final[re.Pattern] = re.compile( + r'^[A-Za-z0-9+\-\*=#$:()\.>/\\\[\]%]+$') _re_cas_registry_number: Final[re.Pattern] = re.compile( r'^\d+-\d+-\d+$') @@ -125,6 +128,10 @@ def set_normalize_casrn(self, normalize_casrn: bool | None) -> None: #: Checks whether argument is a canonical SMILES value. CheckCanonicalSMILES: Final[M.EntryCallbackArgProcessorAlias] =\ functools.partial(M.CheckLiteral, match=_re_canonical_smiles) + + #: Checks whether argument is a IUPAC Name value. + CheckIUPACName: Final[M.EntryCallbackArgProcessorAlias] =\ + functools.partial(M.CheckLiteral, match=_re_iupac_name) #: Checks whether argument is a CAS Registry Number. CheckCAS_RegistryNumber: Final[M.EntryCallbackArgProcessorAlias] =\ @@ -217,6 +224,17 @@ def wd_label_compound(self, c: C, x: V_URI, y: VLiteral): (attr, RDF.type, CHEMINF.IUPAC_Name_generated_by_LexiChem), (attr, SIO.has_value, y)) + @M.register( + [wd.IUPAC_Name(Item(x), Text(y, 'en'))], + {x: CheckCompound(), + y: CheckIUPACName(set_language='en')}) + def wd_IUPAC_Name(self, c: C, x: V_URI, y: VLiteral): + attr = c.bnode() + c.q.triples()( + (x, SIO.has_attribute, attr), + (attr, RDF.type, CHEMINF.IUPAC_Name_generated_by_LexiChem), + (attr, SIO.has_value, y)) + @M.register( [wd.canonical_SMILES(Item(x), String(y))], {x: CheckCompound(), diff --git a/kif_lib/store/mapping/pubchem.py b/kif_lib/store/mapping/pubchem.py index 8880482..e949e3f 100644 --- a/kif_lib/store/mapping/pubchem.py +++ b/kif_lib/store/mapping/pubchem.py @@ -125,6 +125,18 @@ def check_canonical_SMILES(cls, v: Value) -> str: Spec.Skip: `v` is not a canonical SMILES. """ return cls.check_string(v).content + + @classmethod + def check_IUPAC_Name(cls, v: Value) -> str: + """Checks whether `v` is a IUPAC Name. + + Returns: + The string value of `v`. + + Raises: + Spec.Skip: `v` is not a IUPAC Name. + """ + return cls.check_text(v).content @classmethod def check_chemical_formula(cls, v: Value) -> str: @@ -572,6 +584,27 @@ def wd_COMPOUND_description( (chebi_class, IAO.definition, v)) +@PubChemMapping.register( + property=wd.IUPAC_Name, + datatype=TextDatatype(), + subject_prefix=PubChemMapping.COMPOUND) +def wd_IUPAC_Name( + spec: Spec, + q: Builder, + s: TTrm, + p: TTrm, + v: TTrm +) -> None: + if isinstance(v, Value): + ### + # IMPORTANT: IUPAC values in PubChem are tagged with @en. + ### + v = Text(spec.check_IUPAC_Name(cast(Value, v)), 'en') + with q.sp(s, SIO.has_attribute) as sp: + sp.pairs( + (RDF.type, CHEMINF.IUPAC_Name_generated_by_LexiChem), + (SIO.has_value, v)) + @PubChemMapping.register( property=wd.canonical_SMILES, datatype=StringDatatype(), diff --git a/kif_lib/vocabulary/wd/property.py b/kif_lib/vocabulary/wd/property.py index b1e9ac0..7e61404 100644 --- a/kif_lib/vocabulary/wd/property.py +++ b/kif_lib/vocabulary/wd/property.py @@ -14,6 +14,7 @@ auxiliary_verb = P(5401) based_on_heuristic = P(887) canonical_SMILES = P(233) +IUPAC_Name = P(2561) capital = P(36) CAS_Registry_Number = P(231) ChEBI_ID = P(683) @@ -190,6 +191,7 @@ 'author_name_string', 'auxiliary_verb', 'based_on_heuristic', + 'IUPAC_Name', 'canonical_SMILES', 'capital', 'CAS_Registry_Number',