Skip to content

Commit

Permalink
Check unique atom without reading content line
Browse files Browse the repository at this point in the history
  • Loading branch information
bobleesj committed Sep 3, 2024
1 parent 1b1447d commit ac69786
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 27 deletions.
29 changes: 16 additions & 13 deletions src/cifkit/utils/cif_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,23 +342,26 @@ def parse_atom_site_occupancy_info(file_path: str) -> dict:

def check_unique_atom_site_labels(file_path: str):
"""Check whether all parsed atom site labels are unique."""
content_lines = get_line_content_from_tag(
file_path, "_atom_site_occupancy"
)
block = get_cif_block(file_path)

site_labels = set()
for line in content_lines:
parts = line.split()
if len(parts) != 8:
raise ValueError(CifParserError.WRONG_LOOP_VALUE_COUNT.value)
loop_values = get_loop_values(block)

# Check how many unique labels - use _atom_site_label of length 4
label_count = len(loop_values[0])

unique_site_labels = set()

parsed_site_label = parts[0]
parsed_element = parts[1]
site_labels.add(parsed_site_label)
# Collect all the site labels to a set
for j in range(label_count):
unique_site_labels.add(loop_values[0][j])

# Check the element can be parsed from the label
for j in range(label_count):
parsed_site_label = loop_values[0][j]
parsed_element = loop_values[1][j]
if get_atom_type_from_label(parsed_site_label) != parsed_element:
raise ValueError(CifParserError.INVALID_PARSED_ELEMENT.value)

# If the count of unique labels does not match the number of lines, raise an error
if len(content_lines) != len(site_labels):
# Check all the site labels are unique
if label_count != len(unique_site_labels):
raise ValueError(CifParserError.DUPLICATE_LABELS.value)
27 changes: 17 additions & 10 deletions tests/core/util/test_cif_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,6 @@ def test_get_loop_value_ICSD(file_path_ICSD_formatted):
assert loop_values[7][1] == "1."


def test_get_loop_values_wrong_loop_number():
file_path = "tests/data/cif/URhIn_bad_loop_format.cif"
with pytest.raises(ValueError) as e:
get_cif_block(file_path)
assert CifParserError.WRONG_LOOP_VALUE_COUNT.value in str(e.value)


def test_get_num_of_atom_unique_labels(loop_values_URhIn):
assert get_unique_label_count(loop_values_URhIn) == 4

Expand Down Expand Up @@ -282,9 +275,23 @@ def test_get_parsed_atom_site_occupancy_info_with_braket():
}


@pytest.mark.now
def test_check_unique_atom_site_labels(file_path_URhIn):
check_unique_atom_site_labels(file_path_URhIn)
file_path = "tests/data/cif/error/duplicate_labels/457848.cif"

duplicate_labels_file_path = (
"tests/data/cif/bad_cif_format/duplicate_labels.cif"
)
with pytest.raises(ValueError) as e:
check_unique_atom_site_labels(file_path)
assert CifParserError.DUPLICATE_LABELS.value in str(e.value)
check_unique_atom_site_labels(duplicate_labels_file_path)
assert str(e.value) == "The file contains duplicate atom site labels."

unparsable_file_path = (
"tests/data/cif/bad_cif_format/label_element_different.cif"
)
with pytest.raises(ValueError) as e:
check_unique_atom_site_labels(unparsable_file_path)
assert (
str(e.value)
== "The element was not correctly parsed from the site label."
)
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,10 @@ loop_
_atom_site_fract_y
_atom_site_fract_z
_atom_site_occupancy
In1 In 3 g 1
U1 U 3 f 1
Rh1 Rh 2 d 1
Rh2 Rh 1 a 1
In1 In 3 g 0.2505 0 0.5 1
In1 In 3 f 0.5925 0 0 1
Rh1 Rh 2 d 0.333333 0.666667 0.5 1
Rh2 Rh 1 a 0 0 0 1


_exptl_crystal_colour ?
Expand Down
127 changes: 127 additions & 0 deletions tests/data/cif/bad_cif_format/label_element_different.cif
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
##############################################################################
# #
# In-Rh-U # URhIn # 380981 #
# #
##############################################################################
# #
# Pearson's Crystal Data #
# Crystal Structure Database for Inorganic Compounds (on DVD) #
# Release 2022/23 #
# Editors: Pierre Villars and Karin Cenzual #
# #
# Copyright (c) ASM International & Material Phases Data System (MPDS), #
# Switzerland & National Institute for Materials Science (NIMS), Japan, 2022 #
# All rights reserved. Version 2022.07 #
# #
# This copy of Pearson's Crystal Data is licensed to: #
# University of Alberta, Chemistry Department, 1-5 Installations License #
# #
##############################################################################

data_380981
_audit_creation_date 2023-07-02
_audit_creation_method
;
Pearson's Crystal Data browser
;
#_database_code_PCD 380981
_database_code_PDF 04-002-7389

# Entry summary

_chemical_formula_structural 'U Rh In'
_chemical_formula_sum 'In Rh U'
_chemical_name_mineral ?
_chemical_compound_source ?
_chemical_name_structure_type ZrNiAl,hP9,189
_chemical_formula_weight 455.8

# Bibliographic data

_publ_section_title
'Magnetic structures of some UTM compounds'
_journal_coden_ASTM JMMMDC
_journal_name_full 'J. Magn. Magn. Mater.'
_journal_year 1995
_journal_volume 140/144
_journal_page_first 1377
_journal_page_last 1378
_journal_language English
loop_
_publ_author_name
_publ_author_address
''
;
;

# Standardized crystallographic data

_cell_length_a 7.476
_cell_length_b 7.476
_cell_length_c 3.881
_cell_angle_alpha 90
_cell_angle_beta 90
_cell_angle_gamma 120
_cell_volume 187.9
_cell_formula_units_Z 3
_space_group_IT_number 189
_space_group_name_H-M_alt 'P -6 2 m'
loop_
_space_group_symop_id
_space_group_symop_operation_xyz
1 'x, y, z'
2 '-x+y, -x, -z'
3 '-x+y, -x, z'
4 '-x, -x+y, -z'
5 '-x, -x+y, z'
6 '-y, x-y, -z'
7 '-y, x-y, z'
8 'x, y, -z'
9 'x-y, -y, -z'
10 'x-y, -y, z'
11 'y, x, -z'
12 'y, x, z'

# Atomic positions taken from type-defining entry

loop_
_atom_type_symbol
In
U
Rh
loop_
_atom_site_label
_atom_site_type_symbol
_atom_site_symmetry_multiplicity
_atom_site_Wyckoff_symbol
_atom_site_fract_x
_atom_site_fract_y
_atom_site_fract_z
_atom_site_occupancy
In1 In 3 g 0.2505 0 0.5 1
In2 Rh 3 f 0.5925 0 0 1
Rh1 Rh 2 d 0.333333 0.666667 0.5 1
Rh2 Rh 1 a 0 0 0 1


_exptl_crystal_colour ?
_exptl_crystal_density_meas ?
_exptl_crystal_density_diffrn 12.09
_cell_measurement_temperature ?
_cell_measurement_radiation neutrons
_cell_measurement_reflns_used ?
_diffrn_ambient_temperature ?
_diffrn_measurement_device 'automatic diffractometer'
_diffrn_measurement_device_type ?
_diffrn_radiation_type ?
_diffrn_reflns_number ?
_exptl_absorpt_coefficient_mu ?
_exptl_absorpt_correction_type ?
_computing_structure_solution ?
_refine_ls_number_parameters ?
_refine_ls_number_reflns ?
_refine_ls_R_factor_gt ?
_refine_ls_wR_factor_gt ?

# End of data set 380981

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit ac69786

Please sign in to comment.