Skip to content

Commit

Permalink
Data id fix (dials#2681)
Browse files Browse the repository at this point in the history
fix data ids output by json file
  • Loading branch information
amyjaynethompson authored Jun 25, 2024
1 parent d04235d commit 97e2b73
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 2 deletions.
1 change: 1 addition & 0 deletions newsfragments/2681.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
``dials.correlation_matrix``: Correctly select datasets for output json after filtering when used by multiplex.
3 changes: 2 additions & 1 deletion src/dials/algorithms/correlation/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,8 @@ def convert_to_importable_json(self, linkage_matrix: np.ndarray) -> OrderedDict:
linkage_mat_as_dict = linkage_matrix_to_dict(linkage_matrix)
for d in linkage_mat_as_dict.values():
# Difference in indexing between linkage_mat_as_dict and datasets, so have i-1
d["datasets"] = [self.ids_to_identifiers_map[i - 1] for i in d["datasets"]]
real_num = [self.labels[i - 1] for i in d["datasets"]]
d["datasets"] = [self.ids_to_identifiers_map[i] for i in real_num]

return linkage_mat_as_dict

Expand Down
47 changes: 46 additions & 1 deletion tests/algorithms/correlation/test_analysis.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from __future__ import annotations

import json
import pathlib

import pytest

from dials.algorithms.correlation.analysis import CorrelationMatrix
from dials.command_line.correlation_matrix import phil_scope
from dials.util.multi_dataset_handling import (
Expand All @@ -11,7 +14,8 @@
from dials.util.options import ArgumentParser, reflections_and_experiments_from_files


def test_corr_mat(dials_data, run_in_tmp_path):
@pytest.fixture()
def proteinase_k(dials_data):
mcp = dials_data("vmxi_proteinase_k_sweeps", pathlib=True)
params = phil_scope.extract()
input_data = []
Expand Down Expand Up @@ -41,7 +45,48 @@ def test_corr_mat(dials_data, run_in_tmp_path):
assert len(experiments) == len(reflections)
assert len(experiments) > 1
experiments, reflections = assign_unique_identifiers(experiments, reflections)
yield experiments, reflections, params


def test_corr_mat(proteinase_k, run_in_tmp_path):
experiments, reflections, params = proteinase_k
matrices = CorrelationMatrix(experiments, reflections, params)
matrices.calculate_matrices()
matrices.output_json()
assert pathlib.Path("dials.correlation_matrix.json").is_file()


def test_filtered_corr_mat(proteinase_k, run_in_tmp_path):
experiments, reflections, params = proteinase_k
ids_to_identifiers_map = {}
for table in reflections:
ids_to_identifiers_map.update(table.experiment_identifiers())

# Simulate filtered dataset by multiplex
id_to_remove = [ids_to_identifiers_map[2]]
ids_to_identifiers_map.pop(2)
reflections.pop(2)
experiments.remove_on_experiment_identifiers(id_to_remove)

matrices = CorrelationMatrix(
experiments, reflections, params, ids_to_identifiers_map
)
matrices.calculate_matrices()
matrices.output_json()
assert pathlib.Path("dials.correlation_matrix.json").is_file()

expected_ids = [[1, 3], [0, 1, 3]]

# Check main algorithm correct with filtering
for i, j in zip(matrices.correlation_clusters, expected_ids):
assert i.labels == j

# Check json output also correct
with open(pathlib.Path("dials.correlation_matrix.json")) as f:
data = json.load(f)

assert len(data["correlation_matrix_clustering"]) == len(expected_ids)
for i, j in zip(data["correlation_matrix_clustering"], expected_ids):
assert len(data["correlation_matrix_clustering"][i]["datasets"]) == len(j)
for a, e in zip(data["correlation_matrix_clustering"][i]["datasets"], j):
assert a == ids_to_identifiers_map[e]

0 comments on commit 97e2b73

Please sign in to comment.