Skip to content

Commit

Permalink
Implement CIF Ensemble preprocessing print
Browse files Browse the repository at this point in the history
  • Loading branch information
bobleesj committed Jun 19, 2024
1 parent 35c4933 commit 479e193
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 53 deletions.
40 changes: 24 additions & 16 deletions example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,38 @@
"metadata": {},
"outputs": [
{
"name": "stderr",
"name": "stdout",
"output_type": "stream",
"text": [
"2024-06-18 12:45:47,573 - INFO - Preprocessing tests/data/cif/folder/300169.cif\n",
"2024-06-18 12:45:47,583 - INFO - Parsing .cif file and generating a supercell\n",
"2024-06-18 12:45:47,594 - INFO - Computing atomic environments in the supercell\n",
"2024-06-18 12:45:47,756 - INFO - Preprocessing tests/data/cif/folder/300171.cif\n",
"2024-06-18 12:45:47,758 - INFO - Parsing .cif file and generating a supercell\n",
"2024-06-18 12:45:47,763 - INFO - Computing atomic environments in the supercell\n",
"2024-06-18 12:45:47,877 - INFO - Preprocessing tests/data/cif/folder/300170.cif\n",
"2024-06-18 12:45:47,878 - INFO - Parsing .cif file and generating a supercell\n",
"2024-06-18 12:45:47,882 - INFO - Computing atomic environments in the supercell\n"
"\n",
"CIF Preprocessing has begun...\n",
"\n",
"Preprocessing tests/data/cif/ensemble_test/300169.cif (1/6)\n",
"Preprocessing tests/data/cif/ensemble_test/260171.cif (2/6)\n",
"Preprocessing tests/data/cif/ensemble_test/250697.cif (3/6)\n",
"Preprocessing tests/data/cif/ensemble_test/250709.cif (4/6)\n",
"Preprocessing tests/data/cif/ensemble_test/300171.cif (5/6)\n",
"Preprocessing tests/data/cif/ensemble_test/300170.cif (6/6)\n",
"\n",
"File movement summary:\n",
"The returned loop tags do not match the expected tags.: 0 files moved\n",
"Wrong number of values in loop _atom_site_*: 0 files moved\n",
"The file contains duplicate atom site labels.: 0 files moved\n",
"The element was not correctly parsed from the site label.: 0 files moved\n",
"An error occurred while processing symmetry operation: 0 files moved\n",
"Missing atomic coordinates: 0 files moved\n",
"other_error: 0 files moved\n"
]
}
],
"source": [
"from cifkit import Cif\n",
"from cifkit import Cif, CifEnsemble\n",
"from cifkit.utils import folder\n",
"\n",
"# Example usage\n",
"file_paths = folder.get_file_path_list(\"tests/data/cif/folder\")\n",
"\n",
"for file_path in file_paths:\n",
" cif = Cif(file_path, logging_enabled=True)\n",
"cif_ensemble = CifEnsemble(\"tests/data/cif/ensemble_test\")\n",
"\n",
"\n",
" # Print output\n",
" # print(\"Formula:\", cif.formula)\n",
Expand All @@ -39,8 +47,8 @@
" # print(\"Space group:\", cif.space_group_name)\n",
"\n",
" # Set the cut-off radius\n",
" cut_off_radius = 4\n",
" cif.compute_connections(cut_off_radius)\n",
" # cut_off_radius = 4\n",
" # cif.compute_connections(cut_off_radius)\n",
" # print(cif.bond_fraction_CN)\n",
" # print(cif.bond_counts_CN)\n",
" # print(cif.all_bond_pairs)\n",
Expand Down
66 changes: 36 additions & 30 deletions src/cifkit/preprocessors/error.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,43 +21,49 @@ def move_files_based_on_errors(dir_path):

# Dictionary to hold directory paths for each error type
error_directories = {
CifParserError.SYMMETRY_OPERATION_ERROR: dir_path / "bad_op",
CifParserError.WRONG_LOOP_VALUE_COUNT: dir_path / "wrong_loop_value",
CifParserError.MISSING_COORDINATES: dir_path / "bad_coords",
CifParserError.INVALID_PARSED_ELEMENT: dir_path / "invalid_label",
CifParserError.DUPLICATE_LABELS: dir_path / "duplicate_labels",
"other_error": dir_path / "other_error",
"error_operations": dir_path / "error_operations",
"error_duplicate_labels": dir_path / "error_duplicate_labels",
"error_wrong_loop_value": dir_path / "error_wrong_loop_value",
"error_coords": dir_path / "error_coords",
"error_invalid_label": dir_path / "error_invalid_label",
"error_others": dir_path / "error_others",
}

# Ensure all direct
num_files = {key.value: 0 for key in CifParserError}
num_files["other_error"] = 0

file_paths = list(dir_path.glob("*.cif"))
num_files_moved = {key: 0 for key in error_directories.keys()}
file_paths = folder.get_file_paths(str(dir_path))

for file_path in file_paths:
for i, file_path in enumerate(file_paths, start=1):
filename = os.path.basename(file_path)
moved = False

print(f"Preprocessing {file_path} ({i}/{len(file_paths)})")
try:
Cif(file_path)
except Exception as e:
error_message = str(e)
for error, message in CifParserError.__members__.items():
if message.value in error_message:

make_directory_and_move(
file_path, error_directories[message], filename
)
num_files[message.value] += 1
moved = True
break

if not moved:
make_directory_and_move(
file_path, error_directories["other_error"], filename
)
num_files["other_error"] += 1

print(f"File {filename} moved due to error: {error_message}")
print(num_files)
# Example of handling specific errors, adjust as needed
if "symmetry operation" in error_message:
error_type = "error_operations"
elif "contains duplicate atom site labels" in error_message:
error_type = "error_duplicate_labels"
elif "Wrong number of values in loop" in error_message:
error_type = "error_wrong_loop_value"
elif "missing atomic coordinates" in error_message:
error_type = "error_coords"
elif "incorrectly parsed element" in error_message:
error_type = "error_invalid_label"
else:
error_type = "error_others"

make_directory_and_move(
file_path, error_directories[error_type], filename
)
num_files_moved[error_type] += 1
print(
f"File {filename} moved to '{error_type}' due to: {error_message}"
)

# Display the number of files moved to each folder
print("\nSUMMARY")
for error_type, count in num_files_moved.items():
print(f"# of files moved to '{error_type}' folder: {count}")
14 changes: 7 additions & 7 deletions tests/core/preprocessors/test_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ def test_move_files_based_on_errors(tmp_path):

# Define expected directories
expected_dirs = {
"duplicate_labels": tmp_dir / "duplicate_labels",
"wrong_loop_value": tmp_dir / "wrong_loop_value",
"invalid_label": tmp_dir / "invalid_label",
"other_error": tmp_dir / "other_error",
"error_duplicate_labels": tmp_dir / "error_duplicate_labels",
"error_wrong_loop_value": tmp_dir / "error_wrong_loop_value",
"error_invalid_label": tmp_dir / "error_invalid_label",
"error_others": tmp_dir / "error_others",
}

# Run the function with the paths in the temporary directory
move_files_based_on_errors(str(tmp_dir))

# Assert the number of files in each directory
assert get_file_count(expected_dirs["duplicate_labels"]) == 1
assert get_file_count(expected_dirs["wrong_loop_value"]) == 1
# assert get_file_count(expected_dirs["other_error"]) == 1
assert get_file_count(expected_dirs["error_wrong_loop_value"]) == 1
assert get_file_count(expected_dirs["error_duplicate_labels"]) == 1
assert get_file_count(expected_dirs["error_duplicate_labels"]) == 1

0 comments on commit 479e193

Please sign in to comment.