Skip to content

Commit

Permalink
Add func for stripping numbers and symbols in element
Browse files Browse the repository at this point in the history
  • Loading branch information
bobleesj committed Sep 3, 2024
1 parent 8780164 commit 1b1447d
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 0 deletions.
8 changes: 8 additions & 0 deletions src/cifkit/utils/string_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,11 @@ def clean_parsed_structure(structure_type: str) -> str:
Split the parsed structure text and remove "~".
"""
return structure_type.split(",")[0].replace("~", "")


def strip_numbers_and_symbols(value: str) -> str:
"""
Removes all digits and '+' and '-' characters from the input string.
Some ICSD, COD have charges in atomic site element e.g. "Fe0+".
"""
return re.sub(r"[\d\+\-]", "", value)
23 changes: 23 additions & 0 deletions tests/core/util/test_string_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
get_string_to_formatted_float,
trim_string,
clean_parsed_structure,
strip_numbers_and_symbols,
)


Expand Down Expand Up @@ -86,3 +87,25 @@ def test_clean_parsed_formula(value_string, expected):
)
def test_clean_parsed_structure(value_string, expected):
assert clean_parsed_structure(value_string) == expected


@pytest.mark.parametrize(
"input_value, expected_output",
[
("Se2-", "Se"),
("Cu1+", "Cu"),
("Sn0+", "Sn"),
("Fe2O3", "FeO"),
("H2SO4", "HSO"),
("NaCl", "NaCl"),
("Mg2+2", "Mg"),
("3Li+", "Li"),
("", ""),
("123456", ""),
("NO+-", "NO"),
],
)
def test_strip_numbers_and_symbols(input_value, expected_output):
assert (
strip_numbers_and_symbols(input_value) == expected_output
), f"Failed for input: {input_value}"

0 comments on commit 1b1447d

Please sign in to comment.