From 97e2b7327236666bd025bbbbc98eb383683fce17 Mon Sep 17 00:00:00 2001 From: Amy Thompson <52806925+amyjaynethompson@users.noreply.github.com> Date: Tue, 25 Jun 2024 09:45:15 +0100 Subject: [PATCH] Data id fix (#2681) fix data ids output by json file --- newsfragments/2681.bugfix | 1 + src/dials/algorithms/correlation/analysis.py | 3 +- tests/algorithms/correlation/test_analysis.py | 47 ++++++++++++++++++- 3 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 newsfragments/2681.bugfix diff --git a/newsfragments/2681.bugfix b/newsfragments/2681.bugfix new file mode 100644 index 0000000000..4b113a304a --- /dev/null +++ b/newsfragments/2681.bugfix @@ -0,0 +1 @@ +``dials.correlation_matrix``: Correctly select datasets for output json after filtering when used by multiplex. diff --git a/src/dials/algorithms/correlation/analysis.py b/src/dials/algorithms/correlation/analysis.py index cf41589e0e..5a50e33f29 100644 --- a/src/dials/algorithms/correlation/analysis.py +++ b/src/dials/algorithms/correlation/analysis.py @@ -377,7 +377,8 @@ def convert_to_importable_json(self, linkage_matrix: np.ndarray) -> OrderedDict: linkage_mat_as_dict = linkage_matrix_to_dict(linkage_matrix) for d in linkage_mat_as_dict.values(): # Difference in indexing between linkage_mat_as_dict and datasets, so have i-1 - d["datasets"] = [self.ids_to_identifiers_map[i - 1] for i in d["datasets"]] + real_num = [self.labels[i - 1] for i in d["datasets"]] + d["datasets"] = [self.ids_to_identifiers_map[i] for i in real_num] return linkage_mat_as_dict diff --git a/tests/algorithms/correlation/test_analysis.py b/tests/algorithms/correlation/test_analysis.py index 3f0f2ec9c4..a79944a30b 100644 --- a/tests/algorithms/correlation/test_analysis.py +++ b/tests/algorithms/correlation/test_analysis.py @@ -1,7 +1,10 @@ from __future__ import annotations +import json import pathlib +import pytest + from dials.algorithms.correlation.analysis import CorrelationMatrix from dials.command_line.correlation_matrix import phil_scope from dials.util.multi_dataset_handling import ( @@ -11,7 +14,8 @@ from dials.util.options import ArgumentParser, reflections_and_experiments_from_files -def test_corr_mat(dials_data, run_in_tmp_path): +@pytest.fixture() +def proteinase_k(dials_data): mcp = dials_data("vmxi_proteinase_k_sweeps", pathlib=True) params = phil_scope.extract() input_data = [] @@ -41,7 +45,48 @@ def test_corr_mat(dials_data, run_in_tmp_path): assert len(experiments) == len(reflections) assert len(experiments) > 1 experiments, reflections = assign_unique_identifiers(experiments, reflections) + yield experiments, reflections, params + + +def test_corr_mat(proteinase_k, run_in_tmp_path): + experiments, reflections, params = proteinase_k matrices = CorrelationMatrix(experiments, reflections, params) matrices.calculate_matrices() matrices.output_json() assert pathlib.Path("dials.correlation_matrix.json").is_file() + + +def test_filtered_corr_mat(proteinase_k, run_in_tmp_path): + experiments, reflections, params = proteinase_k + ids_to_identifiers_map = {} + for table in reflections: + ids_to_identifiers_map.update(table.experiment_identifiers()) + + # Simulate filtered dataset by multiplex + id_to_remove = [ids_to_identifiers_map[2]] + ids_to_identifiers_map.pop(2) + reflections.pop(2) + experiments.remove_on_experiment_identifiers(id_to_remove) + + matrices = CorrelationMatrix( + experiments, reflections, params, ids_to_identifiers_map + ) + matrices.calculate_matrices() + matrices.output_json() + assert pathlib.Path("dials.correlation_matrix.json").is_file() + + expected_ids = [[1, 3], [0, 1, 3]] + + # Check main algorithm correct with filtering + for i, j in zip(matrices.correlation_clusters, expected_ids): + assert i.labels == j + + # Check json output also correct + with open(pathlib.Path("dials.correlation_matrix.json")) as f: + data = json.load(f) + + assert len(data["correlation_matrix_clustering"]) == len(expected_ids) + for i, j in zip(data["correlation_matrix_clustering"], expected_ids): + assert len(data["correlation_matrix_clustering"][i]["datasets"]) == len(j) + for a, e in zip(data["correlation_matrix_clustering"][i]["datasets"], j): + assert a == ids_to_identifiers_map[e]