From 54eb64f1e14ca7641d3ace81ed1306433fe67fee Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 12 Dec 2024 12:06:34 +0000 Subject: [PATCH 01/44] Add initial anonymisation config for ct --- projects/configs/tag-operations/ct.yaml | 26 +++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 projects/configs/tag-operations/ct.yaml diff --git a/projects/configs/tag-operations/ct.yaml b/projects/configs/tag-operations/ct.yaml new file mode 100644 index 000000000..cdf718ed2 --- /dev/null +++ b/projects/configs/tag-operations/ct.yaml @@ -0,0 +1,26 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Conversion Type + group: 0x0008 + element: 0x0064 + op: keep +- name: Slice Thickness + group: 0x0018 + element: 0x0050 + op: keep +- name: KVP + group: 0x0018 + element: 0x0060 + op: keep From 2730317e646214020fff5fa457d2ec639ce1ea47 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 12 Dec 2024 12:06:58 +0000 Subject: [PATCH 02/44] Add initial anonymisation config for pet --- projects/configs/tag-operations/pet.yaml | 150 +++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 projects/configs/tag-operations/pet.yaml diff --git a/projects/configs/tag-operations/pet.yaml b/projects/configs/tag-operations/pet.yaml new file mode 100644 index 000000000..fe41cacd5 --- /dev/null +++ b/projects/configs/tag-operations/pet.yaml @@ -0,0 +1,150 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Series Date + group: 0x0008 + element: 0x0021 + op: replace +- name: Acquisition Date + group: 0x0008 + element: 0x0022 + op: replace +- name: Series Time + group: 0x0008 + element: 0x0031 + op: replace +- name: Acquisition Time + group: 0x0008 + element: 0x0032 + op: replace +- name: Code Value + group: 0x0008 + element: 0x0100 + op: keep +- name: Coding Scheme Designator + group: 0x0008 + element: 0x0102 + op: keep +- name: Code Meaning + group: 0x0008 + element: 0x0104 + op: keep +- name: Mapping Resource + group: 0x0008 + element: 0x0105 + op: keep +- name: Context Group Version + group: 0x0008 + element: 0x0106 + op: keep +- name: Context Identifier + group: 0x0008 + element: 0x010f + op: keep +- name: Radiopharmaceutical + group: 0x0018 + element: 0x0031 + op: keep +- name: Slice Thickness + group: 0x0018 + element: 0x0050 + op: keep +- name: Radiopharmaceutical Start Time + group: 0x0018 + element: 0x1072 + op: keep +- name: Radionuclide Total Dose + group: 0x0018 + element: 0x1074 + op: keep +- name: Radionuclide Half Life + group: 0x0018 + element: 0x1075 + op: keep +- name: Radionuclide Positron Fraction + group: 0x0018 + element: 0x1076 + op: keep +- name: Radiopharmaceutical Start DateTime + group: 0x0018 + element: 0x1078 + op: keep +- name: Collimator Type + group: 0x0018 + element: 0x1181 + op: keep +- name: Actual Frame Duration + group: 0x0018 + element: 0x1242 + op: keep +- name: Corrected Image + group: 0x0028 + element: 0x0051 + op: keep +- name: Radiopharmaceutical Information Sequence + group: 0x0054 + element: 0x0016 + op: keep +- name: Number of Slices + group: 0x0054 + element: 0x0081 + op: keep +- name: Radionuclide Code Sequence + group: 0x0054 + element: 0x0300 + op: keep +- name: Radiopharmaceutical Code Sequence + group: 0x0054 + element: 0x0304 + op: keep +- name: Patient Orientation Code Sequence + group: 0x0054 + element: 0x0410 + op: keep +- name: Patient Orientation Modifier Code Sequence + group: 0x0054 + element: 0x0412 + op: keep +- name: Patient Gantry Relationship Code Sequence + group: 0x0054 + element: 0x0414 + op: keep +- name: Series Type + group: 0x0054 + element: 0x1000 + op: keep +- name: Units + group: 0x0054 + element: 0x1001 + op: keep +- name: Counts Source + group: 0x0054 + element: 0x1002 + op: keep +- name: Decay Correction + group: 0x0054 + element: 0x1102 + op: keep +- name: Frame Reference Time + group: 0x0054 + element: 0x1300 + op: keep +- name: Decay Factor + group: 0x0054 + element: 0x1321 + op: keep +- name: Image Index + group: 0x0054 + element: 0x1330 + op: keep From ff2a8c38984f3b9d5a137796f0fe5b9acfef2c9c Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 12 Dec 2024 12:07:12 +0000 Subject: [PATCH 03/44] Add config for despiad --- projects/configs/despiad.yaml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 projects/configs/despiad.yaml diff --git a/projects/configs/despiad.yaml b/projects/configs/despiad.yaml new file mode 100644 index 000000000..444b1549c --- /dev/null +++ b/projects/configs/despiad.yaml @@ -0,0 +1,33 @@ +# Copyright (c) 2024 University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +project: + name: "despiad" + modalities: + - "CT" + - "PT" + +tag_operation_files: + base: + - "base.yaml" + #- "ct.yaml" + - "pet.yaml" + - "despiad.yaml" + manufacturer_overrides: [] + +series_filters: [] + +destination: + dicom: "none" + parquet: "none" From f20d5b8bf40804cf164141a0b594e5a798405738 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 12 Dec 2024 13:20:37 +0000 Subject: [PATCH 04/44] anonymnise all resources before notifying the export api this is because resources sharing the same StudyInstanceUID in Orthanc Raw will be combined into a single resource in Orthanc Anon. Previously, we would try to export each resource after anonymisation, but this meant other resources sharing the same StudyInstanceUID were not exported. --- orthanc/orthanc-anon/plugin/pixl.py | 73 ++++++++++++++++++----------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/orthanc/orthanc-anon/plugin/pixl.py b/orthanc/orthanc-anon/plugin/pixl.py index a9d10c641..39f7f9bcc 100644 --- a/orthanc/orthanc-anon/plugin/pixl.py +++ b/orthanc/orthanc-anon/plugin/pixl.py @@ -218,34 +218,24 @@ def ImportStudiesFromRaw(output, uri, **request): # noqa: ARG001 study_uids = payload["StudyInstanceUIDs"] project_name = payload["ProjectName"] - for study_resource_id, study_uid in zip(study_resource_ids, study_uids, strict=False): - executor.submit(_import_study_from_raw, study_resource_id, study_uid, project_name) + executor.submit(_import_studies_from_raw, study_resource_ids, study_uids, project_name) response = json.dumps({"Message": "Ok"}) output.AnswerBuffer(response, "application/json") -def _import_study_from_raw(study_resource_id: str, study_uid: str, project_name: str) -> None: +def _import_studies_from_raw( + study_resource_ids: list[str], study_uids: list[str], project_name: str +) -> None: """ - Import a study from Orthanc Raw. - - Args: - study_resource_id: Resource ID of the study in Orthanc Raw - study_uid: Corresponding StudyInstanceUID - project_name: Name of the project - - - Pull a study from Orthanc Raw based on its resource ID - - Iterate over instances and anonymise them - - Re-upload the study via the dicom-web api - - Notify the PIXL export-api to send the study the to relevant endpoint for the project - + Import a list of studies from Orthanc Raw and optionally notify the export-api to send the + anonymised resources. """ - zipped_study_bytes = get_study_zip_archive_from_raw(resource_id=study_resource_id) - - with ZipFile(zipped_study_bytes) as zipped_study: + anonymised_study_resource_ids = [] + for study_resource_id, study_uid in zip(study_resource_ids, study_uids, strict=False): try: - anonymised_instances_bytes, anonymised_study_uid = _anonymise_study_instances( - zipped_study=zipped_study, + resource_id = _import_study_from_raw( + study_resource_id=study_resource_id, study_uid=study_uid, project_name=project_name, ) @@ -255,19 +245,48 @@ def _import_study_from_raw(study_resource_id: str, study_uid: str, project_name: logger.exception("Failed to anonymize study: {} ", study_uid) return - _upload_instances(anonymised_instances_bytes) + anonymised_study_resource_ids.append(resource_id) if not should_export(): - logger.debug("Not exporting study {} as auto-routing is disabled", anonymised_study_uid) + logger.debug("Not exporting studies {} as auto-routing is disabled", study_uids) return - anonymised_study_resource_id = _get_study_resource_id(anonymised_study_uid) logger.debug( - "Notify export API to retrieve study resource. Original UID {} Anon UID: {}", - study_uid, - anonymised_study_uid, + "Notify export API to retrieve study resources {}", + anonymised_study_resource_ids, ) - send_study(study_id=anonymised_study_resource_id, project_name=project_name) + + for study_id in set(anonymised_study_resource_ids): + send_study(study_id=study_id, project_name=project_name) + + +def _import_study_from_raw(study_resource_id: str, study_uid: str, project_name: str) -> str: + """ + Import a study from Orthanc Raw. + + - Pull a study from Orthanc Raw based on its resource ID + - Iterate over instances and anonymise them + - Re-upload the study via the dicom-web api + + Args: + study_resource_id: Resource ID of the study in Orthanc Raw + study_uid: Corresponding StudyInstanceUID + project_name: Name of the project + + Returns: + Resource ID of the anonymised study in Orthanc Anon + + """ + zipped_study_bytes = get_study_zip_archive_from_raw(resource_id=study_resource_id) + with ZipFile(zipped_study_bytes) as zipped_study: + anonymised_instances_bytes, anonymised_study_uid = _anonymise_study_instances( + zipped_study=zipped_study, + study_uid=study_uid, + project_name=project_name, + ) + + _upload_instances(anonymised_instances_bytes) + return _get_study_resource_id(anonymised_study_uid) def get_study_zip_archive_from_raw(resource_id: str) -> BytesIO: From d13b91688ceb0dc40ca710ce68a2be3997921424 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 12 Dec 2024 14:10:17 +0000 Subject: [PATCH 05/44] remove despaid.yaml from project config --- projects/configs/despiad.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/projects/configs/despiad.yaml b/projects/configs/despiad.yaml index 444b1549c..960d497e8 100644 --- a/projects/configs/despiad.yaml +++ b/projects/configs/despiad.yaml @@ -21,9 +21,8 @@ project: tag_operation_files: base: - "base.yaml" - #- "ct.yaml" + - "ct.yaml" - "pet.yaml" - - "despiad.yaml" manufacturer_overrides: [] series_filters: [] From 01bf793d65f779c2a263114b80e5af1c8f7bc1f7 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Tue, 17 Dec 2024 13:27:00 +0000 Subject: [PATCH 06/44] generate label based on patient id and study count in xnat project --- pixl_core/src/core/uploader/_xnat.py | 62 +++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/pixl_core/src/core/uploader/_xnat.py b/pixl_core/src/core/uploader/_xnat.py index a8c345605..9415d8dcd 100644 --- a/pixl_core/src/core/uploader/_xnat.py +++ b/pixl_core/src/core/uploader/_xnat.py @@ -17,9 +17,14 @@ from __future__ import annotations import os +from io import BytesIO from typing import TYPE_CHECKING, BinaryIO, Optional +from zipfile import ZIP_DEFLATED, ZipFile import xnat +import xnat.datatypes +import xnat.type_hints +from pydicom import dcmread from core.uploader.base import Uploader @@ -77,6 +82,28 @@ def _upload_dicom_image( zip_content = get_study_zip_archive(study_id) self.upload_to_xnat(zip_content, study_tags) + def _split_zip_by_modality(self, zip_content: BinaryIO) -> dict[str, BinaryIO]: + """Split a zip file by modality.""" + zip_content_by_modality = {} + with ZipFile(zip_content) as zipped_study: + for file_info in zipped_study.infolist(): + with zipped_study.open(file_info) as file: + dataset = dcmread(file) + modality = dataset.Modality + patient_id = dataset.PatientID + label = f"{patient_id}_{modality}" + if label not in zip_content_by_modality: + zip_content_by_modality[label] = BytesIO() + with ZipFile( + zip_content_by_modality[label], "a", compression=ZIP_DEFLATED + ) as zipped_modality: + zipped_modality.writestr(file_info.filename, file.read()) + + for zipped_modality in zip_content_by_modality.values(): + zipped_modality.seek(0) + + return zip_content_by_modality + def upload_to_xnat( self, zip_content: BinaryIO, @@ -87,17 +114,48 @@ def upload_to_xnat( user=self.user, password=self.password, ) as session: + experiment = self._get_experiment_label( + session=session, + patient_id=study_tags.patient_id, + ) + session.services.import_( data=zip_content, overwrite=self.overwrite, - destination=self.destination, + destination=self.project_slug, project=self.project_slug, subject=study_tags.patient_id, - experiment=study_tags.pseudo_anon_image_id, + experiment=experiment, content_type="application/zip", import_handler="DICOM-zip", ) + def _get_experiment_label( + self, + session: xnat.XNATSession, + patient_id: str, + ) -> str: + """ + Create a unique experiment label based on the PatientID and number of existing DICOM studies + for the patient. + """ + project: xnat.mixin.ProjectData = session.projects[self.project_slug] + try: + subject: xnat.mixin.SubjectData = project.subjects[patient_id] + except KeyError: + n_archive_experiments = 0 + else: + n_archive_experiments = len(subject.experiments) + + n_prearchive_experiments = len( + session.prearchive.find( + project=self.project_slug, + subject=patient_id, + ) + ) + n_experiments = n_archive_experiments + n_prearchive_experiments + return f"{patient_id}_{n_experiments + 1}" + def upload_parquet_files(self, parquet_export: ParquetExport) -> None: # noqa: ARG002 msg = "XNATUploader does not support parquet files" raise NotImplementedError(msg) From 5a4713635a1a51193f05df3122cac728dd77c47f Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Tue, 17 Dec 2024 13:29:43 +0000 Subject: [PATCH 07/44] Use pseudo-anonymised StudyInstanceUID for xnat experiment label --- pixl_core/src/core/uploader/_xnat.py | 60 +--------------------------- 1 file changed, 1 insertion(+), 59 deletions(-) diff --git a/pixl_core/src/core/uploader/_xnat.py b/pixl_core/src/core/uploader/_xnat.py index 9415d8dcd..56fc59fed 100644 --- a/pixl_core/src/core/uploader/_xnat.py +++ b/pixl_core/src/core/uploader/_xnat.py @@ -17,14 +17,9 @@ from __future__ import annotations import os -from io import BytesIO from typing import TYPE_CHECKING, BinaryIO, Optional -from zipfile import ZIP_DEFLATED, ZipFile import xnat -import xnat.datatypes -import xnat.type_hints -from pydicom import dcmread from core.uploader.base import Uploader @@ -82,28 +77,6 @@ def _upload_dicom_image( zip_content = get_study_zip_archive(study_id) self.upload_to_xnat(zip_content, study_tags) - def _split_zip_by_modality(self, zip_content: BinaryIO) -> dict[str, BinaryIO]: - """Split a zip file by modality.""" - zip_content_by_modality = {} - with ZipFile(zip_content) as zipped_study: - for file_info in zipped_study.infolist(): - with zipped_study.open(file_info) as file: - dataset = dcmread(file) - modality = dataset.Modality - patient_id = dataset.PatientID - label = f"{patient_id}_{modality}" - if label not in zip_content_by_modality: - zip_content_by_modality[label] = BytesIO() - with ZipFile( - zip_content_by_modality[label], "a", compression=ZIP_DEFLATED - ) as zipped_modality: - zipped_modality.writestr(file_info.filename, file.read()) - - for zipped_modality in zip_content_by_modality.values(): - zipped_modality.seek(0) - - return zip_content_by_modality - def upload_to_xnat( self, zip_content: BinaryIO, @@ -114,48 +87,17 @@ def upload_to_xnat( user=self.user, password=self.password, ) as session: - experiment = self._get_experiment_label( - session=session, - patient_id=study_tags.patient_id, - ) - session.services.import_( data=zip_content, overwrite=self.overwrite, destination=self.project_slug, project=self.project_slug, subject=study_tags.patient_id, - experiment=experiment, + experiment=study_tags.pseudo_anon_image_id, content_type="application/zip", import_handler="DICOM-zip", ) - def _get_experiment_label( - self, - session: xnat.XNATSession, - patient_id: str, - ) -> str: - """ - Create a unique experiment label based on the PatientID and number of existing DICOM studies - for the patient. - """ - project: xnat.mixin.ProjectData = session.projects[self.project_slug] - try: - subject: xnat.mixin.SubjectData = project.subjects[patient_id] - except KeyError: - n_archive_experiments = 0 - else: - n_archive_experiments = len(subject.experiments) - - n_prearchive_experiments = len( - session.prearchive.find( - project=self.project_slug, - subject=patient_id, - ) - ) - n_experiments = n_archive_experiments + n_prearchive_experiments - return f"{patient_id}_{n_experiments + 1}" - def upload_parquet_files(self, parquet_export: ParquetExport) -> None: # noqa: ARG002 msg = "XNATUploader does not support parquet files" raise NotImplementedError(msg) From 28f92a69d5ff239e310541e2638bb8e543615e8a Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Tue, 17 Dec 2024 13:38:49 +0000 Subject: [PATCH 08/44] Fix XNAT destination --- pixl_core/src/core/uploader/_xnat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pixl_core/src/core/uploader/_xnat.py b/pixl_core/src/core/uploader/_xnat.py index 56fc59fed..a8c345605 100644 --- a/pixl_core/src/core/uploader/_xnat.py +++ b/pixl_core/src/core/uploader/_xnat.py @@ -90,7 +90,7 @@ def upload_to_xnat( session.services.import_( data=zip_content, overwrite=self.overwrite, - destination=self.project_slug, + destination=self.destination, project=self.project_slug, subject=study_tags.patient_id, experiment=study_tags.pseudo_anon_image_id, From 4fd226a053d8c5f201d5ffde1de0692650fc829c Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 19 Dec 2024 12:49:33 +0000 Subject: [PATCH 09/44] remove changes related to grouping resources before notifying export api it's been addressed in another PR --- orthanc/orthanc-anon/plugin/pixl.py | 73 +++++++++++------------------ 1 file changed, 27 insertions(+), 46 deletions(-) diff --git a/orthanc/orthanc-anon/plugin/pixl.py b/orthanc/orthanc-anon/plugin/pixl.py index 39f7f9bcc..a9d10c641 100644 --- a/orthanc/orthanc-anon/plugin/pixl.py +++ b/orthanc/orthanc-anon/plugin/pixl.py @@ -218,24 +218,34 @@ def ImportStudiesFromRaw(output, uri, **request): # noqa: ARG001 study_uids = payload["StudyInstanceUIDs"] project_name = payload["ProjectName"] - executor.submit(_import_studies_from_raw, study_resource_ids, study_uids, project_name) + for study_resource_id, study_uid in zip(study_resource_ids, study_uids, strict=False): + executor.submit(_import_study_from_raw, study_resource_id, study_uid, project_name) response = json.dumps({"Message": "Ok"}) output.AnswerBuffer(response, "application/json") -def _import_studies_from_raw( - study_resource_ids: list[str], study_uids: list[str], project_name: str -) -> None: +def _import_study_from_raw(study_resource_id: str, study_uid: str, project_name: str) -> None: """ - Import a list of studies from Orthanc Raw and optionally notify the export-api to send the - anonymised resources. + Import a study from Orthanc Raw. + + Args: + study_resource_id: Resource ID of the study in Orthanc Raw + study_uid: Corresponding StudyInstanceUID + project_name: Name of the project + + - Pull a study from Orthanc Raw based on its resource ID + - Iterate over instances and anonymise them + - Re-upload the study via the dicom-web api + - Notify the PIXL export-api to send the study the to relevant endpoint for the project + """ - anonymised_study_resource_ids = [] - for study_resource_id, study_uid in zip(study_resource_ids, study_uids, strict=False): + zipped_study_bytes = get_study_zip_archive_from_raw(resource_id=study_resource_id) + + with ZipFile(zipped_study_bytes) as zipped_study: try: - resource_id = _import_study_from_raw( - study_resource_id=study_resource_id, + anonymised_instances_bytes, anonymised_study_uid = _anonymise_study_instances( + zipped_study=zipped_study, study_uid=study_uid, project_name=project_name, ) @@ -245,48 +255,19 @@ def _import_studies_from_raw( logger.exception("Failed to anonymize study: {} ", study_uid) return - anonymised_study_resource_ids.append(resource_id) + _upload_instances(anonymised_instances_bytes) if not should_export(): - logger.debug("Not exporting studies {} as auto-routing is disabled", study_uids) + logger.debug("Not exporting study {} as auto-routing is disabled", anonymised_study_uid) return + anonymised_study_resource_id = _get_study_resource_id(anonymised_study_uid) logger.debug( - "Notify export API to retrieve study resources {}", - anonymised_study_resource_ids, + "Notify export API to retrieve study resource. Original UID {} Anon UID: {}", + study_uid, + anonymised_study_uid, ) - - for study_id in set(anonymised_study_resource_ids): - send_study(study_id=study_id, project_name=project_name) - - -def _import_study_from_raw(study_resource_id: str, study_uid: str, project_name: str) -> str: - """ - Import a study from Orthanc Raw. - - - Pull a study from Orthanc Raw based on its resource ID - - Iterate over instances and anonymise them - - Re-upload the study via the dicom-web api - - Args: - study_resource_id: Resource ID of the study in Orthanc Raw - study_uid: Corresponding StudyInstanceUID - project_name: Name of the project - - Returns: - Resource ID of the anonymised study in Orthanc Anon - - """ - zipped_study_bytes = get_study_zip_archive_from_raw(resource_id=study_resource_id) - with ZipFile(zipped_study_bytes) as zipped_study: - anonymised_instances_bytes, anonymised_study_uid = _anonymise_study_instances( - zipped_study=zipped_study, - study_uid=study_uid, - project_name=project_name, - ) - - _upload_instances(anonymised_instances_bytes) - return _get_study_resource_id(anonymised_study_uid) + send_study(study_id=anonymised_study_resource_id, project_name=project_name) def get_study_zip_archive_from_raw(resource_id: str) -> BytesIO: From c2fcbe4ce68aa0fbdb0b5ec7621924d636da2aba Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 19 Dec 2024 13:14:24 +0000 Subject: [PATCH 10/44] remove duplicated tags --- projects/configs/tag-operations/ct.yaml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/projects/configs/tag-operations/ct.yaml b/projects/configs/tag-operations/ct.yaml index cdf718ed2..a59f06aef 100644 --- a/projects/configs/tag-operations/ct.yaml +++ b/projects/configs/tag-operations/ct.yaml @@ -16,11 +16,3 @@ group: 0x0008 element: 0x0064 op: keep -- name: Slice Thickness - group: 0x0018 - element: 0x0050 - op: keep -- name: KVP - group: 0x0018 - element: 0x0060 - op: keep From 5d3c69d72c92a764b2ecb77aa6a4610065db36b6 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 2 Jan 2025 12:50:59 +0000 Subject: [PATCH 11/44] Add series_number_filters and allowed_manufacturers parameters to pixl project config --- .../core/project_config/pixl_config_model.py | 41 ++++++++++++++++--- .../project_config/test_project_config.py | 2 +- pixl_dcmd/src/pixl_dcmd/main.py | 25 ++++++++++- projects/configs/despiad.yaml | 2 + 4 files changed, 61 insertions(+), 9 deletions(-) diff --git a/pixl_core/src/core/project_config/pixl_config_model.py b/pixl_core/src/core/project_config/pixl_config_model.py index 407daee61..8b1aff181 100644 --- a/pixl_core/src/core/project_config/pixl_config_model.py +++ b/pixl_core/src/core/project_config/pixl_config_model.py @@ -16,6 +16,7 @@ from __future__ import annotations +import re from enum import Enum from pathlib import Path from typing import Any, Optional @@ -133,20 +134,48 @@ class PixlConfig(BaseModel): """Project-specific configuration for Pixl.""" project: _Project - series_filters: Optional[list[str]] = None + series_filters: Optional[ + list[str] + ] = [] # pydantic is clever and makes a deep copy of the empty default list + series_number_filters: Optional[list[str]] = [] + allowed_manufacturers: Optional[str] = ".*" tag_operation_files: TagOperationFiles destination: _Destination - def is_series_excluded(self, series_description: str) -> bool: + def is_series_description_excluded(self, series_description: str | None) -> bool: """ - Return whether this config excludes the series with the given description + Return whether this config excludes the series with the given description. + + Do a simple case-insensitive substring check - this data is ultimately typed by a human, and + different image sources may have different conventions for case conversion. + :param series_description: the series description to test :returns: True if it should be excluded, False if not """ - if self.series_filters is None or series_description is None: + if not self.series_filters or series_description is None: return False - # Do a simple case-insensitive substring check - this data is ultimately typed by a human, - # and different image sources may have different conventions for case conversion. + return any( series_description.upper().find(filt.upper()) != -1 for filt in self.series_filters ) + + def is_series_number_excluded(self, series_number: str | None) -> bool: + """ + Return whether this config excludes the series with the given number + + :param series_number: the series number to test + :returns: True if it should be excluded, False if not + """ + if not self.series_number_filters or series_number is None: + return False + + return any(series_number.find(filt) != -1 for filt in self.series_number_filters) + + def is_manufacturer_allowed(self, manufacturer: str) -> bool: + """ + Check whether the manufacturer is in the allow-list. + + :param manufacturer: name of the manufacturer + :returns: True is the manufacturer is allowed, False if not + """ + return bool(re.search(rf"{self.allowed_manufacturers}", manufacturer, flags=re.IGNORECASE)) diff --git a/pixl_core/tests/project_config/test_project_config.py b/pixl_core/tests/project_config/test_project_config.py index 56ae5964d..0a8c6722e 100644 --- a/pixl_core/tests/project_config/test_project_config.py +++ b/pixl_core/tests/project_config/test_project_config.py @@ -181,4 +181,4 @@ def test_series_filtering(base_yaml_data, series_filters, test_series_desc, expe if series_filters is not None: base_yaml_data["series_filters"] = series_filters cfg = PixlConfig.model_validate(base_yaml_data) - assert cfg.is_series_excluded(test_series_desc) == expect_exclude + assert cfg.is_series_description_excluded(test_series_desc) == expect_exclude diff --git a/pixl_dcmd/src/pixl_dcmd/main.py b/pixl_dcmd/src/pixl_dcmd/main.py index c69f73dee..2a6a3943e 100644 --- a/pixl_dcmd/src/pixl_dcmd/main.py +++ b/pixl_dcmd/src/pixl_dcmd/main.py @@ -57,13 +57,31 @@ def write_dataset_to_bytes(dataset: Dataset) -> bytes: def _should_exclude_series(dataset: Dataset, cfg: PixlConfig) -> bool: + """ + Check whether the dataset series should be exlucded based on its description + and number. + """ series_description = dataset.get("SeriesDescription") - if cfg.is_series_excluded(series_description): + if cfg.is_series_description_excluded(series_description): logger.info("FILTERING OUT series description: {}", series_description) return True + + series_number = dataset.get("SeriesNumber") + if cfg.is_series_number_excluded(series_number): + logger.info("FILTERING OUT series number: {}", series_number) + return True + return False +def _should_exclude_manufacturer(dataset: Dataset, cfg: PixlConfig) -> bool: + manufacturer = dataset.get("Manufacturer") + should_exclude = not cfg.is_manufacturer_allowed(manufacturer=manufacturer) + if should_exclude: + logger.info("FILTERING out manufacturer: {}", manufacturer) + return should_exclude + + def anonymise_dicom_and_update_db( dataset: Dataset, *, @@ -130,10 +148,13 @@ def anonymise_dicom( ) # Do before anonymisation in case someone decides to delete the - # Series Description tag as part of anonymisation. + # Series Description or Manufacturer tags as part of anonymisation. if _should_exclude_series(dataset, config): msg = "DICOM instance discarded due to its series description" raise PixlSkipInstanceError(msg) + if _should_exclude_manufacturer(dataset, config): + msg = "DICOM instance discarded due to its manufacturer" + raise PixlSkipInstanceError(msg) if dataset.Modality not in config.project.modalities: msg = f"Dropping DICOM Modality: {dataset.Modality}" raise PixlSkipInstanceError(msg) diff --git a/projects/configs/despiad.yaml b/projects/configs/despiad.yaml index 960d497e8..991f3ab8c 100644 --- a/projects/configs/despiad.yaml +++ b/projects/configs/despiad.yaml @@ -26,6 +26,8 @@ tag_operation_files: manufacturer_overrides: [] series_filters: [] +series_number_filters: [] +allowed_manufacturers: ".*" destination: dicom: "none" From 32dd84fdd983bbd75def33944257f4a944f65ccb Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Mon, 6 Jan 2025 09:39:51 +0000 Subject: [PATCH 12/44] clarify docstring of _import_study_from_raw --- orthanc/orthanc-anon/plugin/pixl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/orthanc/orthanc-anon/plugin/pixl.py b/orthanc/orthanc-anon/plugin/pixl.py index a9d10c641..77a651140 100644 --- a/orthanc/orthanc-anon/plugin/pixl.py +++ b/orthanc/orthanc-anon/plugin/pixl.py @@ -234,9 +234,9 @@ def _import_study_from_raw(study_resource_id: str, study_uid: str, project_name: study_uid: Corresponding StudyInstanceUID project_name: Name of the project - - Pull a study from Orthanc Raw based on its resource ID + - Download a study from Orthanc Raw based on its resource ID - Iterate over instances and anonymise them - - Re-upload the study via the dicom-web api + - Upload the study to Orthanc Anon via the dicom-web api - Notify the PIXL export-api to send the study the to relevant endpoint for the project """ From 7867eff64c784cc6ccff9ca3b3fd5b1bbae33b67 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Mon, 6 Jan 2025 10:23:48 +0000 Subject: [PATCH 13/44] Add min_instances_per_series parameter to project config In orthanc anon plugin, skip series that have few than min_instances_per_series instances --- README.md | 6 +++ orthanc/orthanc-anon/plugin/pixl.py | 38 +++++++++++++++++++ .../core/project_config/pixl_config_model.py | 5 +-- projects/configs/despiad.yaml | 1 + 4 files changed, 47 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e9a30860d..487370ad8 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,12 @@ The configuration file defines: - Project name: the `` name of the Project - The DICOM dataset modalities to retain (e.g. `["DX", "CR"]` for X-Ray studies) +- The minimum number of instances required by a series (defaults to 1). May be set higher than 1 to filter out + series with a single screenshot containing patient identifiable data +- A list of series description filters (e.g. `['loc', 'pos']`). Series with descriptions matching any of these + filters will be skipped +- A list of series number filters (e.g. `[3, 4]`). Series with SeriesNumber matching any of these filters will + be skipped - The [anonymisation operations](/pixl_dcmd/README.md#tag-scheme-anonymisation) to be applied to the DICOM tags, by providing a file path to one or multiple YAML files. We currently allow two types of files: diff --git a/orthanc/orthanc-anon/plugin/pixl.py b/orthanc/orthanc-anon/plugin/pixl.py index 77a651140..e266e16da 100644 --- a/orthanc/orthanc-anon/plugin/pixl.py +++ b/orthanc/orthanc-anon/plugin/pixl.py @@ -322,13 +322,27 @@ def _anonymise_study_instances( Return a list of the bytes of anonymised instances, and the anonymised StudyInstanceUID. """ config = load_project_config(project_name) + series_to_skip = ( + get_series_to_skip(study_uid, config.min_instances_per_series) + if config.min_instances_per_series > 1 + else set() + ) anonymised_instances_bytes = [] + logger.debug("Zipped study infolist: {}", zipped_study.infolist()) for file_info in zipped_study.infolist(): with zipped_study.open(file_info) as file: logger.debug("Reading file {}", file) dataset = dcmread(file) + if dataset.SeriesInstanceUID in series_to_skip: + logger.debug( + "Skipping series {} for study {} due to too few instances", + dataset.SeriesInstanceUID, + study_uid, + ) + continue + logger.info("Anonymising file: {} for study: {}", file, study_uid) try: anonymised_instances_bytes.append(_anonymise_dicom_instance(dataset, config)) @@ -350,6 +364,30 @@ def _anonymise_study_instances( return anonymised_instances_bytes, anonymised_study_uid +def get_series_to_skip(zipped_study: ZipFile, min_instances: int) -> set[str]: + """ + Determine which series to skip based on the number of instances in the series. + + If a series has fewer instances than `min_instances`, add it to a set of series to skip. + + Args: + zipped_study: ZipFile containing the study + min_instances: Minimum number of instances required to include a series + + """ + series_instances = {} + for file_info in zipped_study.infolist(): + with zipped_study.open(file_info) as file: + logger.debug("Reading file {}", file) + dataset = dcmread(file) + if dataset.SeriesInstanceUID not in series_instances: + series_instances[dataset.SeriesInstanceUID] = 1 + continue + series_instances[dataset.SeriesInstanceUID] += 1 + + return {series for series, count in series_instances.items() if count < min_instances} + + def _anonymise_dicom_instance(dataset: pydicom.Dataset, config: PixlConfig) -> bytes: """Anonymise a DICOM instance.""" anonymise_dicom_and_update_db(dataset, config=config) diff --git a/pixl_core/src/core/project_config/pixl_config_model.py b/pixl_core/src/core/project_config/pixl_config_model.py index 8b1aff181..9428b6ff3 100644 --- a/pixl_core/src/core/project_config/pixl_config_model.py +++ b/pixl_core/src/core/project_config/pixl_config_model.py @@ -134,9 +134,8 @@ class PixlConfig(BaseModel): """Project-specific configuration for Pixl.""" project: _Project - series_filters: Optional[ - list[str] - ] = [] # pydantic is clever and makes a deep copy of the empty default list + min_instances_per_series: Optional[int] = 1 + series_filters: Optional[list[str]] = [] # pydantic makes a deep copy of the empty default list series_number_filters: Optional[list[str]] = [] allowed_manufacturers: Optional[str] = ".*" tag_operation_files: TagOperationFiles diff --git a/projects/configs/despiad.yaml b/projects/configs/despiad.yaml index 991f3ab8c..c2b31d5fd 100644 --- a/projects/configs/despiad.yaml +++ b/projects/configs/despiad.yaml @@ -25,6 +25,7 @@ tag_operation_files: - "pet.yaml" manufacturer_overrides: [] +min_instances_per_series: 1 series_filters: [] series_number_filters: [] allowed_manufacturers: ".*" From 4cd4c7eb054a216e3ad74c42172617423fa3ad4b Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Wed, 8 Jan 2025 10:02:29 +0000 Subject: [PATCH 14/44] Keep study date and patient dob for despiad --- projects/configs/despiad.yaml | 1 + projects/configs/tag-operations/despiad.yaml | 22 ++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 projects/configs/tag-operations/despiad.yaml diff --git a/projects/configs/despiad.yaml b/projects/configs/despiad.yaml index c2b31d5fd..f0e312ee7 100644 --- a/projects/configs/despiad.yaml +++ b/projects/configs/despiad.yaml @@ -23,6 +23,7 @@ tag_operation_files: - "base.yaml" - "ct.yaml" - "pet.yaml" + - "despiad.yaml" manufacturer_overrides: [] min_instances_per_series: 1 diff --git a/projects/configs/tag-operations/despiad.yaml b/projects/configs/tag-operations/despiad.yaml new file mode 100644 index 000000000..3841d207d --- /dev/null +++ b/projects/configs/tag-operations/despiad.yaml @@ -0,0 +1,22 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Study Date + group: 0x0008 + element: 0x0020 + op: keep +- name: Patient's Birth Date + group: 0x0010 + element: 0x0030 + op: keep From 397c6b1980f3ee5f23a2d8adef18bdb77e06a284 Mon Sep 17 00:00:00 2001 From: davecash75 Date: Mon, 13 Jan 2025 11:38:12 +0000 Subject: [PATCH 15/44] Changes after reviewing the PET data for DESPIAD (#592) --- projects/configs/tag-operations/ct.yaml | 105 +++++++++++++++++++ projects/configs/tag-operations/despiad.yaml | 20 ++++ projects/configs/tag-operations/pet.yaml | 76 ++++++++++++-- 3 files changed, 195 insertions(+), 6 deletions(-) diff --git a/projects/configs/tag-operations/ct.yaml b/projects/configs/tag-operations/ct.yaml index a59f06aef..abbc28fc9 100644 --- a/projects/configs/tag-operations/ct.yaml +++ b/projects/configs/tag-operations/ct.yaml @@ -16,3 +16,108 @@ group: 0x0008 element: 0x0064 op: keep + +- name: Spacing Between Slices + group: 0x0018 + element: 0x0088 + op: keep + +- name: Data Collection Diameter + group: 0x0018 + element: 0x0090 + op: keep + +- name: Reconstruction Diameter + group: 0x0018 + element: 0x1100 + op: keep + +- name: Distance Source to Detector + group: 0x0018 + element: 0x1110 + op: keep + +- name: Distance Source to Patient + group: 0x0018 + element: 0x1111 + op: keep + +- name: Gantry Detector Tilt + group: 0x0018 + element: 0x1120 + op: keep + +- name: Table Height + group: 0x0018 + element: 0x1130 + op: keep + +- name: Rotation Direction + group: 0x0018 + element: 0x1140 + op: keep + +- name: Exposure Time + group: 0x0018 + element: 0x1150 + op: keep + +- name: X-Ray Tube Current + group: 0x0018 + element: 0x1151 + op: keep + +- name: Exposure + group: 0x0018 + element: 0x1152 + op: keep + +- name: Filter Type + group: 0x0018 + element: 0x1160 + op: keep + +- name: Generator Power + group: 0x0018 + element: 0x1170 + op: keep + +- name: Convolution Kernel + group: 0x0018 + element: 0x1210 + op: keep + +- name: Revolution Time + group: 0x0018 + element: 0x9305 + op: keep + +- name: Single Collimation Width + group: 0x0018 + element: 0x9306 + op: keep + +- name: Total Collimation Width + group: 0x0018 + element: 0x9307 + op: keep + +- name: Table Speed + group: 0x0018 + element: 0x9309 + op: keep + +- name: Table Feed per Rotation + group: 0x0018 + element: 0x9310 + op: keep + +- name: Spiral Pitch Factor + group: 0x0018 + element: 0x9311 + op: keep + +- name: Slice Location + group: 0x0020 + element: 0x1041 + op: keep \ No newline at end of file diff --git a/projects/configs/tag-operations/despiad.yaml b/projects/configs/tag-operations/despiad.yaml index 3841d207d..35fb2587a 100644 --- a/projects/configs/tag-operations/despiad.yaml +++ b/projects/configs/tag-operations/despiad.yaml @@ -16,6 +16,26 @@ group: 0x0008 element: 0x0020 op: keep +- name: Series Date + group: 0x0008 + element: 0x0021 + op: keep +- name: Acquisition Date + group: 0x0008 + element: 0x0022 + op: keep +- name: Series Time + group: 0x0008 + element: 0x0031 + op: keep +- name: Acquisition Time + group: 0x0008 + element: 0x0032 + op: keep +- name: Station Name + group: 0x0008 + element: 0x1010 + op: keep - name: Patient's Birth Date group: 0x0010 element: 0x0030 diff --git a/projects/configs/tag-operations/pet.yaml b/projects/configs/tag-operations/pet.yaml index fe41cacd5..952234cc3 100644 --- a/projects/configs/tag-operations/pet.yaml +++ b/projects/configs/tag-operations/pet.yaml @@ -23,11 +23,11 @@ - name: Series Time group: 0x0008 element: 0x0031 - op: replace + op: keep - name: Acquisition Time group: 0x0008 element: 0x0032 - op: replace + op: keep - name: Code Value group: 0x0008 element: 0x0100 @@ -52,6 +52,14 @@ group: 0x0008 element: 0x010f op: keep +- name: Patients Size + group: 0x0010 + element: 0x1020 + op: keep +- name: Patients Weight + group: 0x0010 + element: 0x1030 + op: keep - name: Radiopharmaceutical group: 0x0018 element: 0x0031 @@ -64,6 +72,10 @@ group: 0x0018 element: 0x1072 op: keep +- name: Radiopharmaceutical Stop Time + group: 0x0018 + element: 0x1073 + op: keep - name: Radionuclide Total Dose group: 0x0018 element: 0x1074 @@ -76,10 +88,6 @@ group: 0x0018 element: 0x1076 op: keep -- name: Radiopharmaceutical Start DateTime - group: 0x0018 - element: 0x1078 - op: keep - name: Collimator Type group: 0x0018 element: 0x1181 @@ -132,18 +140,74 @@ group: 0x0054 element: 0x1002 op: keep +- name: Randoms Correction Method + group: 0x0054 + element: 0x1100 + op: keep +- name: Attenuation Correction Method + group: 0x0054 + element: 0x1101 + op: keep - name: Decay Correction group: 0x0054 element: 0x1102 op: keep +- name: Reconstruction Method + group: 0x0054 + element: 0x1103 + op: keep +- name: Detector Lines of Response Used + group: 0x0054 + element: 0x1104 + op: keep +- name: Scatter Correction Method + group: 0x0054 + element: 0x1105 + op: keep +- name: Axial Mash + group: 0x0054 + element: 0x1201 + op: keep +- name: Transverse Mash + group: 0x0054 + element: 0x1202 + op: keep +- name: Coincidence Window Width + group: 0x0054 + element: 0x1210 + op: keep - name: Frame Reference Time group: 0x0054 element: 0x1300 op: keep +- name: Primary Prompts Counts Accumulated + group: 0x0054 + element: 0x1310 + op: keep +- name: Secondary Counts Accumulated + group: 0x0054 + element: 0x1311 + op: keep +- name: Slice Sensitivity Factor + group: 0x0054 + element: 0x1320 + op: keep - name: Decay Factor group: 0x0054 element: 0x1321 op: keep +- name: Dose Calibration Factor + group: 0x0054 + element: 0x1322 + op: keep +- name: Scatter Fraction Factor + group: 0x0054 + element: 0x1323 + op: keep +- name: Dead Time Factor + group: 0x0054 + element: 0x1324 + op: keep - name: Image Index group: 0x0054 element: 0x1330 From b6bcb3c10c29cea17011a37b109e89573ce27c19 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Mon, 13 Jan 2025 13:15:36 +0000 Subject: [PATCH 16/44] Add Radiopharmaceutical Start DateTime to pet.yaml --- projects/configs/tag-operations/pet.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/projects/configs/tag-operations/pet.yaml b/projects/configs/tag-operations/pet.yaml index 952234cc3..0762391e2 100644 --- a/projects/configs/tag-operations/pet.yaml +++ b/projects/configs/tag-operations/pet.yaml @@ -88,6 +88,10 @@ group: 0x0018 element: 0x1076 op: keep +- name: Radiopharmaceutical Start DateTime + group: 0x0018 + element: 0x1078 + op: replace - name: Collimator Type group: 0x0018 element: 0x1181 From ffeb70da0a8844135b99c217f8135eeee60c7deb Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Mon, 13 Jan 2025 13:16:06 +0000 Subject: [PATCH 17/44] remove blank lines from ct.yaml --- projects/configs/tag-operations/ct.yaml | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/projects/configs/tag-operations/ct.yaml b/projects/configs/tag-operations/ct.yaml index abbc28fc9..dd750eb1b 100644 --- a/projects/configs/tag-operations/ct.yaml +++ b/projects/configs/tag-operations/ct.yaml @@ -16,108 +16,87 @@ group: 0x0008 element: 0x0064 op: keep - - name: Spacing Between Slices group: 0x0018 element: 0x0088 op: keep - - name: Data Collection Diameter group: 0x0018 element: 0x0090 op: keep - - name: Reconstruction Diameter group: 0x0018 element: 0x1100 op: keep - - name: Distance Source to Detector group: 0x0018 element: 0x1110 op: keep - - name: Distance Source to Patient group: 0x0018 element: 0x1111 op: keep - - name: Gantry Detector Tilt group: 0x0018 element: 0x1120 op: keep - - name: Table Height group: 0x0018 element: 0x1130 op: keep - - name: Rotation Direction group: 0x0018 element: 0x1140 op: keep - - name: Exposure Time group: 0x0018 element: 0x1150 op: keep - - name: X-Ray Tube Current group: 0x0018 element: 0x1151 op: keep - - name: Exposure group: 0x0018 element: 0x1152 op: keep - - name: Filter Type group: 0x0018 element: 0x1160 op: keep - - name: Generator Power group: 0x0018 element: 0x1170 op: keep - - name: Convolution Kernel group: 0x0018 element: 0x1210 op: keep - - name: Revolution Time group: 0x0018 element: 0x9305 op: keep - - name: Single Collimation Width group: 0x0018 element: 0x9306 op: keep - - name: Total Collimation Width group: 0x0018 element: 0x9307 op: keep - - name: Table Speed group: 0x0018 element: 0x9309 op: keep - - name: Table Feed per Rotation group: 0x0018 element: 0x9310 op: keep - - name: Spiral Pitch Factor group: 0x0018 element: 0x9311 op: keep - - name: Slice Location group: 0x0020 element: 0x1041 - op: keep \ No newline at end of file + op: keep From f6095a85a55fa375eba1b163cdc15629e0132c93 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Mon, 13 Jan 2025 13:31:32 +0000 Subject: [PATCH 18/44] remove tab from config file --- projects/configs/tag-operations/pet.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/configs/tag-operations/pet.yaml b/projects/configs/tag-operations/pet.yaml index 0762391e2..151399e5d 100644 --- a/projects/configs/tag-operations/pet.yaml +++ b/projects/configs/tag-operations/pet.yaml @@ -55,7 +55,7 @@ - name: Patients Size group: 0x0010 element: 0x1020 - op: keep + op: keep - name: Patients Weight group: 0x0010 element: 0x1030 From b231a6089d7157d2f9bf959d6ecd0e7ce37d7d2a Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Tue, 14 Jan 2025 12:17:38 +0000 Subject: [PATCH 19/44] filter series number by manufacturer also default to allowing no manufacturers --- cli/src/pixl_cli/_config.py | 2 +- .../core/project_config/pixl_config_model.py | 43 ++++++++++++++++--- pixl_dcmd/src/pixl_dcmd/main.py | 11 ++++- projects/configs/despiad.yaml | 6 ++- 4 files changed, 50 insertions(+), 12 deletions(-) diff --git a/cli/src/pixl_cli/_config.py b/cli/src/pixl_cli/_config.py index 7a4f578cc..8337f7578 100644 --- a/cli/src/pixl_cli/_config.py +++ b/cli/src/pixl_cli/_config.py @@ -35,7 +35,7 @@ "password": config("PIXL_DB_PASSWORD"), "database": config("PIXL_DB_NAME"), }, -} # type: dict +} class APIConfig: diff --git a/pixl_core/src/core/project_config/pixl_config_model.py b/pixl_core/src/core/project_config/pixl_config_model.py index 9428b6ff3..bf2edfea5 100644 --- a/pixl_core/src/core/project_config/pixl_config_model.py +++ b/pixl_core/src/core/project_config/pixl_config_model.py @@ -62,6 +62,16 @@ class _Project(BaseModel): modalities: list[str] +class Manufacturer(BaseModel): + """ + An allowed manufacturer for a project. + Also defines which series numbers to exclude for this manufacturer. + """ + + regex: str = "no manufacturers allowed ^" + exclude_series_numbers: list[str] = [] + + class TagOperationFiles(BaseModel): """Tag operations files for a project. At least a base file is required.""" @@ -136,8 +146,7 @@ class PixlConfig(BaseModel): project: _Project min_instances_per_series: Optional[int] = 1 series_filters: Optional[list[str]] = [] # pydantic makes a deep copy of the empty default list - series_number_filters: Optional[list[str]] = [] - allowed_manufacturers: Optional[str] = ".*" + allowed_manufacturers: list[Manufacturer] = [Manufacturer()] tag_operation_files: TagOperationFiles destination: _Destination @@ -158,17 +167,20 @@ def is_series_description_excluded(self, series_description: str | None) -> bool series_description.upper().find(filt.upper()) != -1 for filt in self.series_filters ) - def is_series_number_excluded(self, series_number: str | None) -> bool: + def is_series_number_excluded(self, manufacturer: str, series_number: str | None) -> bool: """ - Return whether this config excludes the series with the given number + Return whether this config excludes the series with the given number for the given + manufacturer. + :param manufacturer: the manufacturer to test :param series_number: the series number to test :returns: True if it should be excluded, False if not """ - if not self.series_number_filters or series_number is None: + if not self.is_manufacturer_allowed(manufacturer) or series_number is None: return False - return any(series_number.find(filt) != -1 for filt in self.series_number_filters) + exclude_series_numbers = self.get_manufacturer(manufacturer).exclude_series_numbers + return any(series_number.find(filt) != -1 for filt in exclude_series_numbers) def is_manufacturer_allowed(self, manufacturer: str) -> bool: """ @@ -177,4 +189,21 @@ def is_manufacturer_allowed(self, manufacturer: str) -> bool: :param manufacturer: name of the manufacturer :returns: True is the manufacturer is allowed, False if not """ - return bool(re.search(rf"{self.allowed_manufacturers}", manufacturer, flags=re.IGNORECASE)) + for manufacturer_config in self.allowed_manufacturers: + if re.search(rf"{manufacturer_config.regex}", manufacturer, flags=re.IGNORECASE): + return True + return False + + def get_manufacturer(self, manufacturer: str) -> Manufacturer: + """ + Get the manufacturer configuration for the given manufacturer. + + :param manufacturer: name of the manufacturer + :returns: Manufacturer configuration + :raises: ValueError: if the manufacturer is not allowed + """ + for manufacturer_config in self.allowed_manufacturers: + if re.search(rf"{manufacturer_config.regex}", manufacturer, flags=re.IGNORECASE): + return manufacturer_config + msg = f"Manufacturer {manufacturer} is not allowed by project {self.project.name}" + raise ValueError(msg) diff --git a/pixl_dcmd/src/pixl_dcmd/main.py b/pixl_dcmd/src/pixl_dcmd/main.py index ac0e93a87..e278bae1d 100644 --- a/pixl_dcmd/src/pixl_dcmd/main.py +++ b/pixl_dcmd/src/pixl_dcmd/main.py @@ -66,9 +66,16 @@ def _should_exclude_series(dataset: Dataset, cfg: PixlConfig) -> bool: logger.debug("FILTERING OUT series description: {}", series_description) return True + manufacturer = dataset.get("Manufacturer") series_number = dataset.get("SeriesNumber") - if cfg.is_series_number_excluded(series_number): - logger.debug("FILTERING OUT series number: {}", series_number) + if cfg.is_series_number_excluded( + manufacturer=manufacturer, series_number=series_number + ): + logger.debug( + "FILTERING OUT series number: {} for manufacturer: {}", + series_number, + manufacturer, + ) return True return False diff --git a/projects/configs/despiad.yaml b/projects/configs/despiad.yaml index f0e312ee7..5637e0e38 100644 --- a/projects/configs/despiad.yaml +++ b/projects/configs/despiad.yaml @@ -26,10 +26,12 @@ tag_operation_files: - "despiad.yaml" manufacturer_overrides: [] +allowed_manufacturers: + - manufacturer: "" + exclude_series_numbers: [] + min_instances_per_series: 1 series_filters: [] -series_number_filters: [] -allowed_manufacturers: ".*" destination: dicom: "none" From 5a9c52eb2711c4ca02a7c842083614fd5670990e Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Tue, 14 Jan 2025 13:32:08 +0000 Subject: [PATCH 20/44] Add allowed_manufacturers for all test configs --- pixl_dcmd/tests/test_main.py | 4 +++- projects/configs/despiad.yaml | 4 ---- projects/configs/test-external-user.yaml | 6 ++++++ projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml | 4 ++++ projects/configs/test-extract-uclh-omop-cdm-xnat.yaml | 4 ++++ projects/configs/test-extract-uclh-omop-cdm.yaml | 4 ++++ projects/configs/test-mr-spectroscopy.yaml | 4 ++++ projects/configs/test-radiotherapy.yaml | 4 ++++ 8 files changed, 29 insertions(+), 5 deletions(-) diff --git a/pixl_dcmd/tests/test_main.py b/pixl_dcmd/tests/test_main.py index 78cb2f87d..a8afb88e1 100644 --- a/pixl_dcmd/tests/test_main.py +++ b/pixl_dcmd/tests/test_main.py @@ -194,7 +194,9 @@ def ids_for_parameterised_test(val: pathlib.Path) -> str: @pytest.mark.parametrize( - ("yaml_file"), PROJECT_CONFIGS_DIR.glob("*.yaml"), ids=ids_for_parameterised_test + ("yaml_file"), + PROJECT_CONFIGS_DIR.glob("test-*.yaml"), + ids=ids_for_parameterised_test, ) def test_anonymise_and_validate_dicom(caplog, request, yaml_file) -> None: """ diff --git a/projects/configs/despiad.yaml b/projects/configs/despiad.yaml index 5637e0e38..a9c19d87f 100644 --- a/projects/configs/despiad.yaml +++ b/projects/configs/despiad.yaml @@ -26,10 +26,6 @@ tag_operation_files: - "despiad.yaml" manufacturer_overrides: [] -allowed_manufacturers: - - manufacturer: "" - exclude_series_numbers: [] - min_instances_per_series: 1 series_filters: [] diff --git a/projects/configs/test-external-user.yaml b/projects/configs/test-external-user.yaml index ad4734d07..c710b2adb 100644 --- a/projects/configs/test-external-user.yaml +++ b/projects/configs/test-external-user.yaml @@ -23,6 +23,12 @@ tag_operation_files: - "diffusion-weighted-mri.yaml" manufacturer_overrides: ["mri-diffusion.yaml"] +allowed_manufacturers: + - regex: "^siemens" + exclude_series_numbers: [] + - regex: "^company" + exclude_series_numbers: [] + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml b/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml index a2463fb51..7b8fdc884 100644 --- a/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml @@ -24,6 +24,10 @@ tag_operation_files: - "xray.yaml" manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] +allowed_manufacturers: + - regex: "^company" + exclude_series_numbers: [] + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml b/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml index 2ed67c450..d452ad665 100644 --- a/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml @@ -24,6 +24,10 @@ tag_operation_files: - "xray.yaml" manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] +allowed_manufacturers: + - regex: "^company" + exclude_series_numbers: [] + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/test-extract-uclh-omop-cdm.yaml b/projects/configs/test-extract-uclh-omop-cdm.yaml index 52199ef1e..a2e097d1a 100644 --- a/projects/configs/test-extract-uclh-omop-cdm.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm.yaml @@ -24,6 +24,10 @@ tag_operation_files: - "xray.yaml" manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] +allowed_manufacturers: + - regex: "^company" + exclude_series_numbers: [] + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/test-mr-spectroscopy.yaml b/projects/configs/test-mr-spectroscopy.yaml index 6571deb1e..9d38d47ac 100644 --- a/projects/configs/test-mr-spectroscopy.yaml +++ b/projects/configs/test-mr-spectroscopy.yaml @@ -24,6 +24,10 @@ tag_operation_files: manufacturer_overrides: - "mri.yaml" +allowed_manufacturers: + - regex: "^company" + exclude_series_numbers: [] + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/test-radiotherapy.yaml b/projects/configs/test-radiotherapy.yaml index 029610699..ea31c4257 100644 --- a/projects/configs/test-radiotherapy.yaml +++ b/projects/configs/test-radiotherapy.yaml @@ -28,6 +28,10 @@ tag_operation_files: - "rt-struct.yaml" manufacturer_overrides: null +allowed_manufacturers: + - regex: "^company" + exclude_series_numbers: [] + series_filters: - "localizer" - "localiser" From f7d94e1dd3d08bbf4b1c5d5af3d1778baf464993 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Wed, 15 Jan 2025 09:21:51 +0000 Subject: [PATCH 21/44] Count number of instances skipped due to series having too few instances --- orthanc/orthanc-anon/plugin/pixl.py | 4 +++- pixl_dcmd/src/pixl_dcmd/main.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/orthanc/orthanc-anon/plugin/pixl.py b/orthanc/orthanc-anon/plugin/pixl.py index 5bdb08428..af0089896 100644 --- a/orthanc/orthanc-anon/plugin/pixl.py +++ b/orthanc/orthanc-anon/plugin/pixl.py @@ -342,11 +342,13 @@ def _anonymise_study_instances( dataset = dcmread(file) if dataset.SeriesInstanceUID in series_to_skip: - logger.warning( + logger.debug( "Skipping series {} for study {} due to too few instances", dataset.SeriesInstanceUID, study_info, ) + key = "DICOM instance discarded as series has too few instances" + skipped_instance_counts[key] += 1 continue try: diff --git a/pixl_dcmd/src/pixl_dcmd/main.py b/pixl_dcmd/src/pixl_dcmd/main.py index e278bae1d..76f0c8b76 100644 --- a/pixl_dcmd/src/pixl_dcmd/main.py +++ b/pixl_dcmd/src/pixl_dcmd/main.py @@ -152,7 +152,7 @@ def anonymise_dicom( # Do before anonymisation in case someone decides to delete the # Series Description or Manufacturer tags as part of anonymisation. if _should_exclude_series(dataset, config): - msg = "DICOM instance discarded due to its series description" + msg = "DICOM instance discarded due to its series description or number" raise PixlSkipInstanceError(msg) if _should_exclude_manufacturer(dataset, config): msg = "DICOM instance discarded due to its manufacturer" From 1891df08a523afe7341628355db1b9943bd4e004 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Wed, 15 Jan 2025 10:18:39 +0000 Subject: [PATCH 22/44] move get_series_to_skip to dcmd --- orthanc/orthanc-anon/plugin/pixl.py | 28 +------------------------ pixl_dcmd/src/pixl_dcmd/main.py | 32 ++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/orthanc/orthanc-anon/plugin/pixl.py b/orthanc/orthanc-anon/plugin/pixl.py index af0089896..18d349cb3 100644 --- a/orthanc/orthanc-anon/plugin/pixl.py +++ b/orthanc/orthanc-anon/plugin/pixl.py @@ -45,6 +45,7 @@ from pixl_dcmd.dicom_helpers import get_study_info from pixl_dcmd.main import ( anonymise_dicom_and_update_db, + get_series_to_skip, parse_validation_results, write_dataset_to_bytes, ) @@ -388,33 +389,6 @@ def _anonymise_study_instances( return anonymised_instances_bytes, anonymised_study_uid -def get_series_to_skip(zipped_study: ZipFile, min_instances: int) -> set[str]: - """ - Determine which series to skip based on the number of instances in the series. - - If a series has fewer instances than `min_instances`, add it to a set of series to skip. - - Args: - zipped_study: ZipFile containing the study - min_instances: Minimum number of instances required to include a series - - """ - if min_instances <= 1: - return set() - - series_instances = {} - for file_info in zipped_study.infolist(): - with zipped_study.open(file_info) as file: - logger.debug("Reading file {}", file) - dataset = dcmread(file) - if dataset.SeriesInstanceUID not in series_instances: - series_instances[dataset.SeriesInstanceUID] = 1 - continue - series_instances[dataset.SeriesInstanceUID] += 1 - - return {series for series, count in series_instances.items() if count < min_instances} - - def _anonymise_dicom_instance(dataset: pydicom.Dataset, config: PixlConfig) -> tuple[bytes, dict]: """Anonymise a DICOM instance.""" validation_errors = anonymise_dicom_and_update_db(dataset, config=config) diff --git a/pixl_dcmd/src/pixl_dcmd/main.py b/pixl_dcmd/src/pixl_dcmd/main.py index 76f0c8b76..f5c843bfb 100644 --- a/pixl_dcmd/src/pixl_dcmd/main.py +++ b/pixl_dcmd/src/pixl_dcmd/main.py @@ -16,6 +16,7 @@ import typing from functools import lru_cache from io import BytesIO +from zipfile import ZipFile import requests from core.exceptions import PixlSkipInstanceError @@ -26,7 +27,7 @@ anonymize_dataset, ) from loguru import logger -from pydicom import DataElement, Dataset, dcmwrite +from pydicom import DataElement, Dataset, dcmread, dcmwrite from core.project_config.pixl_config_model import PixlConfig from pixl_dcmd._database import ( @@ -56,6 +57,35 @@ def write_dataset_to_bytes(dataset: Dataset) -> bytes: return buffer.read() +def get_series_to_skip(zipped_study: ZipFile, min_instances: int) -> set[str]: + """ + Determine which series to skip based on the number of instances in the series. + + If a series has fewer instances than `min_instances`, add it to a set of series to skip. + + Args: + zipped_study: ZipFile containing the study + min_instances: Minimum number of instances required to include a series + + """ + if min_instances <= 1: + return set() + + series_instances = {} + for file_info in zipped_study.infolist(): + with zipped_study.open(file_info) as file: + logger.debug("Reading file {}", file) + dataset = dcmread(file) + if dataset.SeriesInstanceUID not in series_instances: + series_instances[dataset.SeriesInstanceUID] = 1 + continue + series_instances[dataset.SeriesInstanceUID] += 1 + + return { + series for series, count in series_instances.items() if count < min_instances + } + + def _should_exclude_series(dataset: Dataset, cfg: PixlConfig) -> bool: """ Check whether the dataset series should be exlucded based on its description From 95975590a3caee94171e280ab444ccb13b4e7886 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Wed, 15 Jan 2025 10:34:06 +0000 Subject: [PATCH 23/44] Add philips and carestream as allowed manufacturers for test project --- projects/configs/test-extract-uclh-omop-cdm.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/projects/configs/test-extract-uclh-omop-cdm.yaml b/projects/configs/test-extract-uclh-omop-cdm.yaml index a2e097d1a..e2d94c61f 100644 --- a/projects/configs/test-extract-uclh-omop-cdm.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm.yaml @@ -25,8 +25,12 @@ tag_operation_files: manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] allowed_manufacturers: + - regex: "^carestream" + exclude_series_numbers: [] - regex: "^company" exclude_series_numbers: [] + - regex: "^philips" + exclude_series_numbers: [] series_filters: - "localizer" From 5ea6418ca41830212b57add3d6b75b77d29fe795 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Wed, 15 Jan 2025 10:38:24 +0000 Subject: [PATCH 24/44] Update description of project config in readme --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 487370ad8..638176ef7 100644 --- a/README.md +++ b/README.md @@ -152,8 +152,9 @@ The configuration file defines: series with a single screenshot containing patient identifiable data - A list of series description filters (e.g. `['loc', 'pos']`). Series with descriptions matching any of these filters will be skipped -- A list of series number filters (e.g. `[3, 4]`). Series with SeriesNumber matching any of these filters will - be skipped +- A list of allowed manufacturers. By default, no manufacturers are allowed. For each manufacturer: + - A regex to identify the allowed manufacturer (e.g. `^philips`) + - A list of series numbers to exclude for the given manufacturer (e.g. `[3, 4]`) - The [anonymisation operations](/pixl_dcmd/README.md#tag-scheme-anonymisation) to be applied to the DICOM tags, by providing a file path to one or multiple YAML files. We currently allow two types of files: From aa170a6972ef92fad0ff42b9cd9a70a3b1ac8bbe Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Tue, 21 Jan 2025 15:06:45 +0000 Subject: [PATCH 25/44] Check _should_exclude_manufacurer before _should_exclude_series in case the manufacturer doesn't exist --- pixl_core/src/core/project_config/pixl_config_model.py | 4 ++-- pixl_dcmd/src/pixl_dcmd/main.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pixl_core/src/core/project_config/pixl_config_model.py b/pixl_core/src/core/project_config/pixl_config_model.py index bf2edfea5..d626360ac 100644 --- a/pixl_core/src/core/project_config/pixl_config_model.py +++ b/pixl_core/src/core/project_config/pixl_config_model.py @@ -179,7 +179,7 @@ def is_series_number_excluded(self, manufacturer: str, series_number: str | None if not self.is_manufacturer_allowed(manufacturer) or series_number is None: return False - exclude_series_numbers = self.get_manufacturer(manufacturer).exclude_series_numbers + exclude_series_numbers = self._get_manufacturer(manufacturer).exclude_series_numbers return any(series_number.find(filt) != -1 for filt in exclude_series_numbers) def is_manufacturer_allowed(self, manufacturer: str) -> bool: @@ -194,7 +194,7 @@ def is_manufacturer_allowed(self, manufacturer: str) -> bool: return True return False - def get_manufacturer(self, manufacturer: str) -> Manufacturer: + def _get_manufacturer(self, manufacturer: str) -> Manufacturer: """ Get the manufacturer configuration for the given manufacturer. diff --git a/pixl_dcmd/src/pixl_dcmd/main.py b/pixl_dcmd/src/pixl_dcmd/main.py index f5c843bfb..b2685fc5d 100644 --- a/pixl_dcmd/src/pixl_dcmd/main.py +++ b/pixl_dcmd/src/pixl_dcmd/main.py @@ -181,12 +181,12 @@ def anonymise_dicom( # Do before anonymisation in case someone decides to delete the # Series Description or Manufacturer tags as part of anonymisation. - if _should_exclude_series(dataset, config): - msg = "DICOM instance discarded due to its series description or number" - raise PixlSkipInstanceError(msg) if _should_exclude_manufacturer(dataset, config): msg = "DICOM instance discarded due to its manufacturer" raise PixlSkipInstanceError(msg) + if _should_exclude_series(dataset, config): + msg = "DICOM instance discarded due to its series description or number" + raise PixlSkipInstanceError(msg) if dataset.Modality not in config.project.modalities: msg = f"Dropping DICOM Modality: {dataset.Modality}" raise PixlSkipInstanceError(msg) From f1eed497ca321d34c8d14376b5800e0499c28146 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Tue, 21 Jan 2025 15:08:27 +0000 Subject: [PATCH 26/44] filter out instance if manufacturer tag is missing --- pixl_dcmd/src/pixl_dcmd/main.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pixl_dcmd/src/pixl_dcmd/main.py b/pixl_dcmd/src/pixl_dcmd/main.py index b2685fc5d..f0ae909e8 100644 --- a/pixl_dcmd/src/pixl_dcmd/main.py +++ b/pixl_dcmd/src/pixl_dcmd/main.py @@ -113,6 +113,9 @@ def _should_exclude_series(dataset: Dataset, cfg: PixlConfig) -> bool: def _should_exclude_manufacturer(dataset: Dataset, cfg: PixlConfig) -> bool: manufacturer = dataset.get("Manufacturer") + if manufacturer is None: + logger.debug("FILTERING out as manufacturer tag is missing") + should_exclude = not cfg.is_manufacturer_allowed(manufacturer=manufacturer) if should_exclude: logger.debug("FILTERING out manufacturer: {}", manufacturer) From 46d2109a7134d51491c610fb62ddab32990b6a14 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Tue, 21 Jan 2025 15:15:15 +0000 Subject: [PATCH 27/44] allow all manufacturers for existing projects --- pixl_dcmd/tests/test_main.py | 2 +- projects/configs/despiad.yaml | 7 ++++++- projects/configs/ms-pinpoint.yaml | 6 ++++++ projects/configs/prognosis-ai.yaml | 6 ++++++ ...sogastric-tube-project-ngt-only-full-dataset.yaml | 7 +++++++ .../configs/uclh-prostate-mri-external-dataset.yaml | 7 +++++++ template_config.yaml | 12 ++++++++++++ 7 files changed, 45 insertions(+), 2 deletions(-) diff --git a/pixl_dcmd/tests/test_main.py b/pixl_dcmd/tests/test_main.py index 25cc9426d..524a44df5 100644 --- a/pixl_dcmd/tests/test_main.py +++ b/pixl_dcmd/tests/test_main.py @@ -195,7 +195,7 @@ def ids_for_parameterised_test(val: pathlib.Path) -> str: @pytest.mark.parametrize( ("yaml_file"), - PROJECT_CONFIGS_DIR.glob("test-*.yaml"), + PROJECT_CONFIGS_DIR.glob("*.yaml"), ids=ids_for_parameterised_test, ) def test_anonymise_and_validate_dicom(caplog, request, yaml_file) -> None: diff --git a/projects/configs/despiad.yaml b/projects/configs/despiad.yaml index a9c19d87f..ef1366e82 100644 --- a/projects/configs/despiad.yaml +++ b/projects/configs/despiad.yaml @@ -26,9 +26,14 @@ tag_operation_files: - "despiad.yaml" manufacturer_overrides: [] +allowed_manufacturers: + - regex: ".*" + exclude_series_numbers: [] + min_instances_per_series: 1 + series_filters: [] destination: - dicom: "none" + dicom: "xnat" parquet: "none" diff --git a/projects/configs/ms-pinpoint.yaml b/projects/configs/ms-pinpoint.yaml index a68258adf..db78dc463 100644 --- a/projects/configs/ms-pinpoint.yaml +++ b/projects/configs/ms-pinpoint.yaml @@ -23,6 +23,12 @@ tag_operation_files: - "ms-pinpoint.yaml" manufacturer_overrides: ["mri.yaml"] +allowed_manufacturers: + - regex: ".*" + exclude_series_numbers: [] + +min_instances_per_series: 1 + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/prognosis-ai.yaml b/projects/configs/prognosis-ai.yaml index f1292856e..8594f119b 100644 --- a/projects/configs/prognosis-ai.yaml +++ b/projects/configs/prognosis-ai.yaml @@ -23,6 +23,12 @@ tag_operation_files: - "ion-neuro-db.yaml" manufacturer_overrides: ["mri.yaml"] +allowed_manufacturers: + - regex: ".*" + exclude_series_numbers: [] + +min_instances_per_series: 1 + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml b/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml index c54eac399..ca0afff29 100644 --- a/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml +++ b/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml @@ -22,6 +22,13 @@ tag_operation_files: - "base.yaml" #Expected base config file for any project - "xray.yaml" manufacturer_overrides: null + +allowed_manufacturers: + - regex: ".*" + exclude_series_numbers: [] + +min_instances_per_series: 1 + destination: dicom: "ftps" parquet: "ftps" diff --git a/projects/configs/uclh-prostate-mri-external-dataset.yaml b/projects/configs/uclh-prostate-mri-external-dataset.yaml index 326e94c92..a68e09134 100644 --- a/projects/configs/uclh-prostate-mri-external-dataset.yaml +++ b/projects/configs/uclh-prostate-mri-external-dataset.yaml @@ -22,6 +22,13 @@ tag_operation_files: - "mri.yaml" - "diffusion-weighted-mri.yaml" manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] + +allowed_manufacturers: + - regex: ".*" + exclude_series_numbers: [] + +min_instances_per_series: 1 + destination: dicom: "ftps" parquet: "ftps" diff --git a/template_config.yaml b/template_config.yaml index 223c11fe9..24b411f8b 100644 --- a/template_config.yaml +++ b/template_config.yaml @@ -22,6 +22,18 @@ tag_operation_files: # DICOM tag anonymisation operations - "base-tag-operations.yaml" # Base schema manufacturer_overrides: none # Manufactuer-dependendent overrides +allowed_manufacturers: + - regex: ".*" # allow all manufacturers + exclude_series_numbers: [] + +min_instances_per_series: 1 + +series_filters: + - "localizer" + - "localiser" + - "scout" + - "positioning" + destination: dicom: "ftps" # alternatives: "dicomweb", "xnat", "none" parquet: "ftps" # alternatives: "none" From b6872b917d7239efe587a49ee821e7f0ffce1b22 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Mon, 27 Jan 2025 14:54:30 +0000 Subject: [PATCH 28/44] Add tests for PixlConfig.is_manufacturer_allowed and PixlConfig.is_series_number_excluded --- .../core/project_config/pixl_config_model.py | 2 +- .../project_config/test_project_config.py | 39 +++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/pixl_core/src/core/project_config/pixl_config_model.py b/pixl_core/src/core/project_config/pixl_config_model.py index d626360ac..d32418d4b 100644 --- a/pixl_core/src/core/project_config/pixl_config_model.py +++ b/pixl_core/src/core/project_config/pixl_config_model.py @@ -177,7 +177,7 @@ def is_series_number_excluded(self, manufacturer: str, series_number: str | None :returns: True if it should be excluded, False if not """ if not self.is_manufacturer_allowed(manufacturer) or series_number is None: - return False + return True exclude_series_numbers = self._get_manufacturer(manufacturer).exclude_series_numbers return any(series_number.find(filt) != -1 for filt in exclude_series_numbers) diff --git a/pixl_core/tests/project_config/test_project_config.py b/pixl_core/tests/project_config/test_project_config.py index 0a8c6722e..632cf9bfe 100644 --- a/pixl_core/tests/project_config/test_project_config.py +++ b/pixl_core/tests/project_config/test_project_config.py @@ -182,3 +182,42 @@ def test_series_filtering(base_yaml_data, series_filters, test_series_desc, expe base_yaml_data["series_filters"] = series_filters cfg = PixlConfig.model_validate(base_yaml_data) assert cfg.is_series_description_excluded(test_series_desc) == expect_exclude + + +@pytest.mark.parametrize( + ("regex", "manufacturer", "allowed"), + [ + ("^allowed", "allowed", True), + ("allowed", "not-allowed", False), + (None, "allowed", False), + ], +) +def test_manufacturer_regex_filtering(base_yaml_data, regex, manufacturer, allowed): + """Check the allowed manufacturers regex works.""" + if regex is not None: + base_yaml_data["allowed_manufacturers"] = [{"regex": "^allowed"}] + cfg = PixlConfig.model_validate(base_yaml_data) + assert cfg.is_manufacturer_allowed(manufacturer) == allowed + + +@pytest.mark.parametrize( + ("manufacturer", "series_number", "expect_exclude"), + [ + ("allowed", "2", True), + ("allowed", "4", False), + ("allowed", None, True), + ("not-allowed", "4", True), + ], +) +def test_manufacturer_series_number_filterings( + base_yaml_data, manufacturer, series_number, expect_exclude +): + """Check the series number are correctly excluded.""" + base_yaml_data["allowed_manufacturers"] = [ + {"regex": "^allowed", "exclude_series_numbers": ["1", "2", "3"]} + ] + cfg = PixlConfig.model_validate(base_yaml_data) + assert ( + cfg.is_series_number_excluded(manufacturer=manufacturer, series_number=series_number) + == expect_exclude + ) From 6b707cb80ee26546f53df09b51c74846f77982b6 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Mon, 27 Jan 2025 15:41:46 +0000 Subject: [PATCH 29/44] Add more tests for _should_exclude_series --- .../core/project_config/pixl_config_model.py | 2 +- pixl_dcmd/tests/test_main.py | 61 +++++++++---------- .../configs/test-extract-uclh-omop-cdm.yaml | 2 +- 3 files changed, 32 insertions(+), 33 deletions(-) diff --git a/pixl_core/src/core/project_config/pixl_config_model.py b/pixl_core/src/core/project_config/pixl_config_model.py index d32418d4b..48ae2b4e0 100644 --- a/pixl_core/src/core/project_config/pixl_config_model.py +++ b/pixl_core/src/core/project_config/pixl_config_model.py @@ -180,7 +180,7 @@ def is_series_number_excluded(self, manufacturer: str, series_number: str | None return True exclude_series_numbers = self._get_manufacturer(manufacturer).exclude_series_numbers - return any(series_number.find(filt) != -1 for filt in exclude_series_numbers) + return any(str(series_number).find(filt) != -1 for filt in exclude_series_numbers) def is_manufacturer_allowed(self, manufacturer: str) -> bool: """ diff --git a/pixl_dcmd/tests/test_main.py b/pixl_dcmd/tests/test_main.py index 0bbdbe0e1..f1aef4333 100644 --- a/pixl_dcmd/tests/test_main.py +++ b/pixl_dcmd/tests/test_main.py @@ -407,40 +407,39 @@ def test_no_pseudo_patient_id_processing( ) -@pytest.fixture() -def dicom_series_to_keep() -> list[pydicom.Dataset]: - series = [ - "", - "whatever", - ] - return [_make_dicom(s) for s in series] - - -@pytest.fixture() -def dicom_series_to_exclude() -> list[pydicom.Dataset]: - series = [ - "positioning", - "foo_barpositioning", - "positioningla", - "scout", - "localiser", - "localizer", - # Matching should be case insensitive - "lOcALIsER", - ] - return [_make_dicom(s) for s in series] - - -def _make_dicom(series_description) -> pydicom.Dataset: - return generate_dicom_dataset(SeriesDescription=series_description) +def _make_dicom(series_description, manufacturer, series_number) -> pydicom.Dataset: + return generate_dicom_dataset( + SeriesDescription=series_description, + Manufacturer=manufacturer, + SeriesNumber=series_number, + ) -def test_should_exclude_series(dicom_series_to_exclude, dicom_series_to_keep): +@pytest.mark.parametrize( + ("series_description", "manufacturer", "series_number", "expect_exclude"), + [ + ("", "Company", "1", False), + ("whatever", "Company", "1", False), + ("whatever", "Company", None, True), + ("positioning", "Company", "1", True), + ("foo_barpositioning", "Company", "1", True), + ("positioningla", "Company", "1", True), + ("scout", "Company", "1", True), + ("localiser", "Company", "1", True), + ("localizer", "Company", "1", True), + ("lOcALIsER", "Company", "1", True), + ("", "DifferentCompany", "1", True), + ("", "Company", "1234567890", True), + ], +) +def test_should_exclude_series( + series_description, manufacturer, series_number, expect_exclude +): config = load_project_config(TEST_PROJECT_SLUG) - for s in dicom_series_to_keep: - assert not _should_exclude_series(s, config) - for s in dicom_series_to_exclude: - assert _should_exclude_series(s, config) + ds = _make_dicom(series_description, manufacturer, series_number) + series_number = ds.get("SeriesNumber") + print(f"{series_number=}", type(series_number), str(series_number)) + assert _should_exclude_series(ds, config) == expect_exclude def test_can_nifti_convert_post_anonymisation( diff --git a/projects/configs/test-extract-uclh-omop-cdm.yaml b/projects/configs/test-extract-uclh-omop-cdm.yaml index e2d94c61f..f8d5a94a2 100644 --- a/projects/configs/test-extract-uclh-omop-cdm.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm.yaml @@ -28,7 +28,7 @@ allowed_manufacturers: - regex: "^carestream" exclude_series_numbers: [] - regex: "^company" - exclude_series_numbers: [] + exclude_series_numbers: ["1234567890"] - regex: "^philips" exclude_series_numbers: [] From a7c5917fb4b8962fd9f12b2cbe1a7ca67ecc03b1 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Mon, 27 Jan 2025 16:11:16 +0000 Subject: [PATCH 30/44] Add tests for test_should_exclude_manufacturer --- pixl_dcmd/src/pixl_dcmd/main.py | 1 + pixl_dcmd/tests/test_main.py | 25 +++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/pixl_dcmd/src/pixl_dcmd/main.py b/pixl_dcmd/src/pixl_dcmd/main.py index f0ae909e8..78c659617 100644 --- a/pixl_dcmd/src/pixl_dcmd/main.py +++ b/pixl_dcmd/src/pixl_dcmd/main.py @@ -115,6 +115,7 @@ def _should_exclude_manufacturer(dataset: Dataset, cfg: PixlConfig) -> bool: manufacturer = dataset.get("Manufacturer") if manufacturer is None: logger.debug("FILTERING out as manufacturer tag is missing") + return True should_exclude = not cfg.is_manufacturer_allowed(manufacturer=manufacturer) if should_exclude: diff --git a/pixl_dcmd/tests/test_main.py b/pixl_dcmd/tests/test_main.py index f1aef4333..201dc8867 100644 --- a/pixl_dcmd/tests/test_main.py +++ b/pixl_dcmd/tests/test_main.py @@ -43,6 +43,7 @@ anonymise_dicom, _enforce_allowlist, _should_exclude_series, + _should_exclude_manufacturer, ) from pytest_pixl.dicom import generate_dicom_dataset from pytest_pixl.helpers import run_subprocess @@ -407,7 +408,11 @@ def test_no_pseudo_patient_id_processing( ) -def _make_dicom(series_description, manufacturer, series_number) -> pydicom.Dataset: +def _make_dicom( + series_description="mri_sequence", + manufacturer="Company", + series_number="901", +) -> pydicom.Dataset: return generate_dicom_dataset( SeriesDescription=series_description, Manufacturer=manufacturer, @@ -438,10 +443,26 @@ def test_should_exclude_series( config = load_project_config(TEST_PROJECT_SLUG) ds = _make_dicom(series_description, manufacturer, series_number) series_number = ds.get("SeriesNumber") - print(f"{series_number=}", type(series_number), str(series_number)) assert _should_exclude_series(ds, config) == expect_exclude +@pytest.mark.parametrize( + ("manufacturer", "expect_exclude"), + [ + ("Company", False), + ("DifferentCompany", True), + (None, True), + ], +) +def test_should_exclude_manufacturer(manufacturer, expect_exclude): + config = load_project_config(TEST_PROJECT_SLUG) + ds = _make_dicom(manufacturer=manufacturer) + if manufacturer is None: + # the Manufacturer tag is sometimes missing in real data + delattr(ds, "Manufacturer") + assert _should_exclude_manufacturer(ds, config) == expect_exclude + + def test_can_nifti_convert_post_anonymisation( tmp_path, directory_of_mri_dicoms, From 7527b62403c2c1a069781242a5288a3352bf71b9 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Tue, 28 Jan 2025 09:14:56 +0000 Subject: [PATCH 31/44] Add tests for get_series_to_skip --- pixl_dcmd/tests/test_main.py | 29 ++++++++++++++++++ .../pytest_pixl/data/dicom-study/study.zip | Bin 0 -> 10037 bytes 2 files changed, 29 insertions(+) create mode 100644 pytest-pixl/src/pytest_pixl/data/dicom-study/study.zip diff --git a/pixl_dcmd/tests/test_main.py b/pixl_dcmd/tests/test_main.py index 201dc8867..476b716e9 100644 --- a/pixl_dcmd/tests/test_main.py +++ b/pixl_dcmd/tests/test_main.py @@ -13,11 +13,13 @@ # limitations under the License. from __future__ import annotations +from importlib import resources import pathlib import re from pathlib import Path import logging import typing +import zipfile import nibabel import numpy as np @@ -41,6 +43,7 @@ _anonymise_dicom_from_scheme, anonymise_and_validate_dicom, anonymise_dicom, + get_series_to_skip, _enforce_allowlist, _should_exclude_series, _should_exclude_manufacturer, @@ -55,6 +58,32 @@ TEST_PROJECT_SLUG = "test-extract-uclh-omop-cdm" +@pytest.fixture() +def zipped_dicom_study() -> Path: + """Dummy DICOM study for tests.""" + path = resources.files("pytest_pixl") / "data" / "dicom-study" / "study.zip" + return zipfile.ZipFile(path) + + +@pytest.mark.parametrize( + ("min_instances", "expected_num_series_skipped"), + [ + (1, 0), + (2, 4), + ], +) +def test_get_series_to_skip( + zipped_dicom_study: zipfile.ZipFile, + min_instances: int, + expected_num_series_skipped: int, +): + """ + Check series are skipped if containing too few instances. + """ + series_to_skip = get_series_to_skip(zipped_dicom_study, min_instances) + assert len(series_to_skip) == expected_num_series_skipped + + @pytest.fixture(scope="module") def tag_scheme(test_project_config: PixlConfig) -> list[dict]: """Base tag scheme for testing.""" diff --git a/pytest-pixl/src/pytest_pixl/data/dicom-study/study.zip b/pytest-pixl/src/pytest_pixl/data/dicom-study/study.zip new file mode 100644 index 0000000000000000000000000000000000000000..33d8bd9baa13da513a1c66dbcac9a269e90db9b7 GIT binary patch literal 10037 zcmeI&c{JO5;{Wl^y?1W!OmExiswg^L6{Q-4*v6frYOB4rXelWnsU;#p%zWK0mT-xk z&=IN)vBgg0_Sz~TE*fc&o;BI&b{~do8OuFeZPNv&zXsH{NXs>C!gbd z^2qa*TehEmc6{HyeP8T*Wo6=3)ih06`C{L`sDoeZGm`(*$Pj3tuLroMd*-X)(9m0V zqk;orQ92eDy4L`D`UY2Z&$zg{UH8VUNwr~{>m3==*?45n@ zUuM}Tf2qM-`E!^0?jGwou}99Z&dYO$vHg{QDn*fX5U;Ud;l2t5_0QjIV{*hsk+TD+!32g~=61qhdX1LBSxyVaK;~VB zy{F;X>GSL^@yJ5&M(cUFg6J*~I_H^laDo!eUFuC(RnTW#$HQJPG_>zTCw#N|h!Cy% zxT|XZI={+vK8O>2#M%?QBDH|$t)hpWPqLb(#+NI`(+P%=yDfA6-|HwR(a9z$VA>uA zeF~v{9?J6Smoh7-8w-~`PO=hW$fIh{24{SvtEYBj!ep~C`dF(%8vsiCW{yW+OB5%Numh6Q7J|%lGuKHfNMt&hZ#i5y{GeL2)q;IDe_KP&074%y$m$P?__j{w^ zefsMGVbhpJj_J5j8?u4Y5HEg^fZQWB7xo)7)yLMvp%szMa5Wf@GGV9snsi;=J(%Dy zG!|UBxb-C3AA;vW^tQTHT_W{re@-@ zRJ~uIt{!lP?%f~SD5Zx#V}g`htJGoFNgC#b+i-g8`NS3*{P0z^FQ?O1AMNDNAaN9RI+Fmhcy!bS9rgSOd6W zZu_DnUm3NWf^$_)B$`ZxAODpipo)gBgU)oRi;@7o@=Yyhhf&H|Az$~Kmq90wDfT?N z_9QLfC}*(V3Ouv3kzyaEm#pNe!a`1{aXCsVD$ZE1f-E?ZoR z^^Tt`S}xrfNW-ruTR`dqctjR;n^7-5Yp7E@$*Hh9F zX*RTyo^rr4_Okl}>0tEbu_Y&1y$hc&=pOeWqC+eXxVSC7c%@IA#AO(}@H1bLTq!y2 zobs7Q4W97eIVyl#M#omyOe1IVyS!kc$avq>F5kzDI@puduIl+L3nskoX*h216W6D zR1hxP1>(0gz-G2#8;Q9VlN?xg^hrt;koK!;l`yrkN58+VSbfF^M{s3G2e>3NKuPQw2n@5{fUb9Nl=6 z_u5goya1U)X`AkX`X`R`ko#GJ0usm`TwJDRwqb3Gqk63G*i$`PR=U)g0Yix+$kr zoSae?@{971Qz{r15Q?}LC^y#s52;+`r0V`(NGepAX0>SoamO$jFqe+j{(PrCqs4?y zo-f}PH$Qfo`>H;gl&$4Q0fF0f69U6`9s@x;cgO7*7yQ!Srn7S3sUD;E8}jvVbEWrC zh;8}3WNCAjCWr}#tzrjzls#&+L$zVOa6oLR72bL})JhnrDZ5H<>snJ!cQ3Zl&bd$l zQKC{>*8G`$3M4l`zD?WSi-$UO?t|w|bvw1-7j=q27|CmiZABV+CaGR*VL(d`%Dm+0 zW=#RBl7$ey#tEFFwWn^#;n=!N{7`n4>xIZj)bPUEW?(z8@_znaIH1+H#}#1oJhrnw zVeSr8ENYp>RE15ZQ`)VKt=H3K{@;au!Crt&nO!6k9f&CP_Hfc&uR03{wf z777uQB2-DSRft%sgIYr|(Tm5kXTicmqA)cB+`@vnHJfT?W47tyHn;sEn{=++iyb{S zsHSjPa}-dLlI5VdE}2MLRvt}H%IqjyeSGP=vPDCoRbH8HjubYd+r?h+54k2NaV3EB zcNq%ZkahYBS_8D~#1&dQNlvV1A_!i0S~5djxZ>{^;^u@xL6FEm;e7~NtRB`g z>cO2Je(=2*VQi^-!BgeefnVN3Y37sMF-D9AFUYyv6FtgKm9)czzAtJUQtJ%&zB^Cx ztXLPW=pZ&(3bY)CmA2ABKtGvDMxzk0jx5G?0AJ?e0r5Yq9+{#EkgVkD=dfz7R9Rtj ziBiV4okA}xNTw@DSw8w z%H*FAm$YmqYWS^eX6y}~_ggu;HxZ%Te+K3Z(Hq#`)e=Ctn{ZLax#vi$7YKMB*5L?5 zmaAhFqHF3ov!JTG2ZVny3TFj-iGE9Y@~2SpCTTpT&TPvWA4H0{L{wdjP`+(6X+BOp zkse!RU0LLf8q`UP6syj0jTw}LBHL$<+R#=KMi`BP~&6lj6k~|D*#PygNxUI$S ze$}d4Nw2XML5SP6ZPKZS5f5~^9mX1;qq?WnKe-lkJA2)q_3E(`SG;1q<9`Z3s*V@; zA`^ZW5JMX9Yi~O#nvB1-o*TC@G{|nT?(4&~Cj4;cqD(w8qwH48XHGO?GTSI7(N!@t z>6*G~N$Bqad+xsx9{POe?ywj_8Gbq;JP4}WiXuC3Rn|sl7WP(E6yQ&tR?}?H-7Iih zL9}@`7TSC0>LrINwZmGagq2ZKZ7k3oWllf*^`M88p`K}CRn(JjAq^@*UC7brKDEqP zm7aQa2auAK>iwUUOOF^FJbflQ=o{JzQOKg<_WSRTC5V^$}F0(S5U*Ae}XwX+b zU`1%T!2h;g_VgHqWCmlPoZ^D|80em-0CpXGOz4M!+Xo?*41p5`MTLcnXC=|CS$aga ze8tbkzN^VR2 zukbG-Y-ZevU^-{XS?ZN7UVW6|hhK!dc|)0rijI?a-iVK^UwGRvkpG2htIS$45%f1G^5BU61-dj zzyjEd+0HWokT;SzaMNDT}zXeE}lr_UKOvGmOJ=Nq(X;ELd?^q z24vI4bPIF+qq%dIs+6)1vFaX_Rlt_Vssm8q+}y`l)uhC3T@>nWKWyX|9;hVUZ^+QE ze^0Bo%!lP{VWDQ_ffRBj&_DqWro;xmt5wRw0q5S;s&ihBq~l#C;zJXJvtFo+?RhsH zhV(G#a8VC2UB|1^$^ehpb2;GWPg3#}c5g^aGbj%P8$B+4I(QI+e94)Y7?+ict$I~V zOpB$Q4Pp(AYGhB#X=U^iT9L_?Y&or7aJ=0b2KHJqIMx2d3>8XIT zydEH48fFYr2%>Tk`ter#7M#L)#eHE_*OUNh*GF%yy+-j-gSbF)}#d$1iKzUHgT2IYV?G|@0-;vU*l#fb;9do zv&yiy-+AAx);>0?FcnSac+)aW(sJ7~zhKv=D$04L58lR7c1cgdl@_i)zVyijL=kfO zH^Du(M|RXlQY&allIID|*w{dUvET3h!qFKOzu3^SvaC_F&I%heei~)Ku@r zBaVA`SbMJ8c>uN23I-)NeX2$mKs(X+CSv?w6pnlYcc;23>x@DCh@&0(Yo&sh4in$f z-tG?XDodXC#p;btj_6Yj85DF8T5Pz!8?IdlG8)fb4(6N$R@dk>%;qRm_dU_kBT}@Y z>yuuu%MJvay65_(j}N)Z?jra-2ZH6UDmB*FZ}}5fsXzMQs)!G+ntku8EBqge75!bY zvU^voB=3q9eg{cfl)LH#bL+jU&c1V1@;g_>{VtII(N%2m+}~Wa>@cjd=FJ}66MAcN zEeL`utYr0I*I`!Dpq41e#qGN%|+GDKA8Cpq_C#zvaOXuz<%Z7kg z+*CkKcYUvMgVr+m$ft^-v$`2hq3VhAh8Kr9cKXTn^L>7yr39zfuh8L5^Vl}aW}XTb zdOQ(BHJ`uko#=X1sppE#r{a@|dQHr}!B%zp(Bd1*n$uCe#%rY5olyl=jQd<#Bw$d| z%9dy!h3DF`@9A$PEjm}(k5`<`&CQvsCTZ({FMH?u|GLi)p6f4C<@Xx9Ww>7&n|0>6 z0KD>^M>tx->S?BmtpGRel*yY+=(tEbu*OXWhM$r#CA;p8jqav-lJ0|#b{u5$>&{JF zyP+=b?pr*GgD(l=X_M;-HOgVt-dWqB zhc4I0`4*op$*&xh_2b0+{6*SK1XK2}=O zVXu$L>W1G50NX0Qhtv;cZ${ zagt^Y@l~S$L~N785HmO~G*#FvSSAfImVJa9!m|6;$A|9uRYDDfdT3M;X5&BAR_%7d zoISviu)2USxTp!tGoPezHiyTSD59~cv6mL(ro2h>(D>NIfEhJlHH$UG(v-}5--|eF zInX=V>tBE-B&Uv1D0srg)Ye${hWEluyNT!z1pmC=jjK$sY=N=Fc*&U$5Zn!USFG+S zs5y*fNTgjjV~*V#MX-@Rl(7AH&KwgRRIa}p4bG$1H+0rwrU@m$KEkK}Hpai~6^Kbq z`1I$p`t7T_@GZHlV*a^gW$%0@Fy_Z(W$*m|S^@3chzp-yE(8w_#KXUJSk&|APT#aR z(SNq?X!GrePV40oUk?f#JCLe1?@eo%76PKtk&NRf#>`8 zSlb&k|8w7Ag%BLo>dlR3nS6TSsb@+@V5xpLq|w8w(d6jjKz1z$Iwyi{_rYgMl8$oJ zhd;1NC})+4_G8cCq*6~gtJ*|otLr=F`EphX*Gutd1x=+(G8a;x+e}eKdt zUO5fywT)x}J>2_JB0svSt+usAkvceErx>W3PF5u4l4iQJgmPDzc%iggZ$B$44&GiX zUFw;6sIg9WC*}{F=j^%eQE$Hp$}B$Rn*+q&0@ zWd#(NJjuqk*CECC=jL~Z*GATMx^_4_NN(6gu^Zv`1hpN{;Iu}z#EZcI)a~Y@6Scop zM#MB7@+LvlnlT$oLZ)GL@1hrfacm4Y3%{PugJu&Zcl$-0Eo2kn#>Z&IiG@JK_z%%) zoZK9#7k3qB!kNO?j{C>_|3qxgI|o;dv6 zWp>xUo=`n_o?5j&CZ7b`qCKa(jy0MSr{q|a_&H8l>P%5D&t-o3o>p-mXa&jSIWWq! zDns_F+mgqbZqBB|@-eXT!x;GB*&(X$<6wlwivRw5g-kfJR}-;RiO6fD;ogmb0UyS| zXcIx5Dk%aMh%Zs=?%KRCVhgJF(c$!QTy7V(}|>7$zTX`$}afkDCfl*mxx;@0-V z2sy2I$sG(-`Yv}@?^yfqE{lJJLeJdSJm5+EWXIe#EgArBzd@& zrSj!wS1E5OmSEA$QjVHhSqQ>+7%x`zllO$i+T1@71S+|`+QIJaHOC%%q6+Li(CQA& zdRUicADPo8TQRp+xw4~#KlgH0h=LrqP(8;u3;Cu)#i*vQzE`Zud)W3#&cW3xK;vt||iu30U0zi(DXb#SJ**CcneY&v771au=GsqPbdkuf* zdIEjXAFHtz)y;6NJMXXQR>KcQUPh6V*JHb$)HpDjiKIrs%hXG-tBc6wK5A~}s^DOU zd<;a&$H0mSWb(%`u*2=~euSZs@KLzG{dXHrY-!yUnKp+1#~65i?xqR@=+K!9!9DDS z_?eLVX)h*$=DF=&&2{dp*PgUbU@3S@9U(U2Pf+WL{R`V`l!?d2`>l<)(BytA-)Z-0 zuWP8GoMY-#Z`w(NoLPnHwA_=kqtXMjQ8czrnTaoH(|Kk6DGW)}|Bd{H^6b#i*xOc6 zYC6_N*1&KyHy?^zkg#mHX}%_faU^Xvv1_K$W<%0NWdUT~h`ph1SjsUd8VZbu?mbv{ z*z6*4wk8(d$qE(QNlD1VM9XDWqF2LOofBrqY+o74(?AaNW@CFImIgy0W$WwFX;Wyi zw+jwljuT6zpDFRXB2v?7Bbw&&+k#PxLcE>1#>DnYA-TWD8XF7gSeu`*0T#wJx9xV> zmTd2qI~JGt+FbHuOv@gYuSzzt3ENfw9s~bnStWns-yVB62L9@kef$3YC#!ybeg5%r z@E4!6`sFddTpj;*jO}m!2R6uG9{0-@_qXH9j{Y;p{k<~x%kzFI)W4m#tNah2_xHVe t%l6lwzx$J)$-f$ZwQt|;3;XsR-S@|cz`Id__s;mE&NxOy-M Date: Tue, 28 Jan 2025 11:31:35 +0000 Subject: [PATCH 32/44] Don't allow all manufacturers in the template config --- template_config.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/template_config.yaml b/template_config.yaml index 24b411f8b..690c7ab83 100644 --- a/template_config.yaml +++ b/template_config.yaml @@ -20,11 +20,15 @@ project: tag_operation_files: # DICOM tag anonymisation operations base: - "base-tag-operations.yaml" # Base schema - manufacturer_overrides: none # Manufactuer-dependendent overrides + manufacturer_overrides: [] # Manufactuer-dependendent overrides allowed_manufacturers: - - regex: ".*" # allow all manufacturers + - regex: "^example-manufacturer" exclude_series_numbers: [] + # For DICOM generated by Canon, Series "8000" always has bunred-in data + # so we always exlcude this series + - regex: "^canon" + exclude_series_numbers: ["8000"] min_instances_per_series: 1 From 48b3fd7c8d236f5ed5a242ae0b16bb81221a524e Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Tue, 28 Jan 2025 11:32:16 +0000 Subject: [PATCH 33/44] Set min_instances to 2 for despiad --- projects/configs/despiad.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/configs/despiad.yaml b/projects/configs/despiad.yaml index ef1366e82..4bd4a4926 100644 --- a/projects/configs/despiad.yaml +++ b/projects/configs/despiad.yaml @@ -30,7 +30,7 @@ allowed_manufacturers: - regex: ".*" exclude_series_numbers: [] -min_instances_per_series: 1 +min_instances_per_series: 2 series_filters: [] From 332e64890b3d392a503091fff5f91291c8160a18 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Wed, 29 Jan 2025 09:41:12 +0000 Subject: [PATCH 34/44] Only allow manufacturer GE MEDICAL SYSTEMS for DESPIAD --- projects/configs/despiad.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/configs/despiad.yaml b/projects/configs/despiad.yaml index 4bd4a4926..09865b6bf 100644 --- a/projects/configs/despiad.yaml +++ b/projects/configs/despiad.yaml @@ -27,7 +27,7 @@ tag_operation_files: manufacturer_overrides: [] allowed_manufacturers: - - regex: ".*" + - regex: "^ge medical systems" exclude_series_numbers: [] min_instances_per_series: 2 From ee5eb8727717ae065583bb31a1f00eeb822a0423 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Wed, 29 Jan 2025 11:44:23 +0000 Subject: [PATCH 35/44] Keep Number of Time Slices attribute for PET --- projects/configs/tag-operations/pet.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/projects/configs/tag-operations/pet.yaml b/projects/configs/tag-operations/pet.yaml index 151399e5d..17065eec0 100644 --- a/projects/configs/tag-operations/pet.yaml +++ b/projects/configs/tag-operations/pet.yaml @@ -112,6 +112,10 @@ group: 0x0054 element: 0x0081 op: keep +- name: Number of Time Slices + group: 0x0054 + element: 0x0101 + op: keep - name: Radionuclide Code Sequence group: 0x0054 element: 0x0300 From 2084bdb706bc9f1b1d15e46531947f2447986488 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 30 Jan 2025 08:40:58 +0000 Subject: [PATCH 36/44] set 'pydicom.config.convert_wrong_length_to_UN = True' in dcmd --- pixl_dcmd/src/pixl_dcmd/main.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pixl_dcmd/src/pixl_dcmd/main.py b/pixl_dcmd/src/pixl_dcmd/main.py index 78c659617..808e82862 100644 --- a/pixl_dcmd/src/pixl_dcmd/main.py +++ b/pixl_dcmd/src/pixl_dcmd/main.py @@ -28,6 +28,7 @@ ) from loguru import logger from pydicom import DataElement, Dataset, dcmread, dcmwrite +import pydicom from core.project_config.pixl_config_model import PixlConfig from pixl_dcmd._database import ( @@ -44,6 +45,10 @@ from pixl_dcmd.dicom_helpers import StudyInfo +# See: https://github.com/pydicom/pydicom/issues/2170 +pydicom.config.convert_wrong_length_to_UN = True + + def write_dataset_to_bytes(dataset: Dataset) -> bytes: """ Write pydicom DICOM dataset to byte array From fadc6951b7f01aad2895bc96036e64b49813db4d Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 30 Jan 2025 09:01:09 +0000 Subject: [PATCH 37/44] Add series filters to despiad config --- projects/configs/despiad.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/projects/configs/despiad.yaml b/projects/configs/despiad.yaml index 09865b6bf..cad608a3f 100644 --- a/projects/configs/despiad.yaml +++ b/projects/configs/despiad.yaml @@ -32,7 +32,11 @@ allowed_manufacturers: min_instances_per_series: 2 -series_filters: [] +series_filters: + - "localizer" + - "localiser" + - "scout" + - "positioning" destination: dicom: "xnat" From c1bfc73fc8dd383fd28e202b00d075906aa22ffe Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 30 Jan 2025 11:49:53 +0000 Subject: [PATCH 38/44] Add series number and description filers for despiad --- projects/configs/despiad.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/projects/configs/despiad.yaml b/projects/configs/despiad.yaml index cad608a3f..6564b6c36 100644 --- a/projects/configs/despiad.yaml +++ b/projects/configs/despiad.yaml @@ -28,7 +28,13 @@ tag_operation_files: allowed_manufacturers: - regex: "^ge medical systems" - exclude_series_numbers: [] + exclude_series_numbers: + - "1200" + - "1201" + - "1202" + - "1203" + - "1301" + - "1501" min_instances_per_series: 2 @@ -37,6 +43,7 @@ series_filters: - "localiser" - "scout" - "positioning" + - "mip" destination: dicom: "xnat" From ad911256d6050828e6ea5e4c34d8abc05a7be4ff Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 30 Jan 2025 12:34:13 +0000 Subject: [PATCH 39/44] Use ints for series numbers to exclude --- .../core/project_config/pixl_config_model.py | 4 ++-- pixl_dcmd/tests/test_main.py | 23 +++++++++---------- projects/configs/despiad.yaml | 12 +++++----- .../configs/test-extract-uclh-omop-cdm.yaml | 3 ++- template_config.yaml | 12 +++++++++- 5 files changed, 32 insertions(+), 22 deletions(-) diff --git a/pixl_core/src/core/project_config/pixl_config_model.py b/pixl_core/src/core/project_config/pixl_config_model.py index 48ae2b4e0..23d9988a1 100644 --- a/pixl_core/src/core/project_config/pixl_config_model.py +++ b/pixl_core/src/core/project_config/pixl_config_model.py @@ -69,7 +69,7 @@ class Manufacturer(BaseModel): """ regex: str = "no manufacturers allowed ^" - exclude_series_numbers: list[str] = [] + exclude_series_numbers: list[int] = [] class TagOperationFiles(BaseModel): @@ -180,7 +180,7 @@ def is_series_number_excluded(self, manufacturer: str, series_number: str | None return True exclude_series_numbers = self._get_manufacturer(manufacturer).exclude_series_numbers - return any(str(series_number).find(filt) != -1 for filt in exclude_series_numbers) + return series_number in exclude_series_numbers def is_manufacturer_allowed(self, manufacturer: str) -> bool: """ diff --git a/pixl_dcmd/tests/test_main.py b/pixl_dcmd/tests/test_main.py index 476b716e9..178bd3840 100644 --- a/pixl_dcmd/tests/test_main.py +++ b/pixl_dcmd/tests/test_main.py @@ -452,18 +452,18 @@ def _make_dicom( @pytest.mark.parametrize( ("series_description", "manufacturer", "series_number", "expect_exclude"), [ - ("", "Company", "1", False), - ("whatever", "Company", "1", False), + ("", "Company", 1, False), + ("whatever", "Company", 1, False), ("whatever", "Company", None, True), - ("positioning", "Company", "1", True), - ("foo_barpositioning", "Company", "1", True), - ("positioningla", "Company", "1", True), - ("scout", "Company", "1", True), - ("localiser", "Company", "1", True), - ("localizer", "Company", "1", True), - ("lOcALIsER", "Company", "1", True), - ("", "DifferentCompany", "1", True), - ("", "Company", "1234567890", True), + ("positioning", "Company", 1, True), + ("foo_barpositioning", "Company", 1, True), + ("positioningla", "Company", 1, True), + ("scout", "Company", 1, True), + ("localiser", "Company", 1, True), + ("localizer", "Company", 1, True), + ("lOcALIsER", "Company", 1, True), + ("", "DifferentCompany", 1, True), + ("", "Company", 123456789, True), ], ) def test_should_exclude_series( @@ -471,7 +471,6 @@ def test_should_exclude_series( ): config = load_project_config(TEST_PROJECT_SLUG) ds = _make_dicom(series_description, manufacturer, series_number) - series_number = ds.get("SeriesNumber") assert _should_exclude_series(ds, config) == expect_exclude diff --git a/projects/configs/despiad.yaml b/projects/configs/despiad.yaml index 6564b6c36..b1f5be5e7 100644 --- a/projects/configs/despiad.yaml +++ b/projects/configs/despiad.yaml @@ -29,12 +29,12 @@ tag_operation_files: allowed_manufacturers: - regex: "^ge medical systems" exclude_series_numbers: - - "1200" - - "1201" - - "1202" - - "1203" - - "1301" - - "1501" + - 1200 + - 1201 + - 1202 + - 1203 + - 1301 + - 1501 min_instances_per_series: 2 diff --git a/projects/configs/test-extract-uclh-omop-cdm.yaml b/projects/configs/test-extract-uclh-omop-cdm.yaml index f8d5a94a2..4e7f9ec1b 100644 --- a/projects/configs/test-extract-uclh-omop-cdm.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm.yaml @@ -28,7 +28,8 @@ allowed_manufacturers: - regex: "^carestream" exclude_series_numbers: [] - regex: "^company" - exclude_series_numbers: ["1234567890"] + exclude_series_numbers: + - 123456789 - regex: "^philips" exclude_series_numbers: [] diff --git a/template_config.yaml b/template_config.yaml index 690c7ab83..bb9708d48 100644 --- a/template_config.yaml +++ b/template_config.yaml @@ -28,7 +28,16 @@ allowed_manufacturers: # For DICOM generated by Canon, Series "8000" always has bunred-in data # so we always exlcude this series - regex: "^canon" - exclude_series_numbers: ["8000"] + exclude_series_numbers: + - 8000 + - regex: "^ge medical systems" + exclude_series_numbers: + - 1200 + - 1201 + - 1202 + - 1203 + - 1301 + - 1501 min_instances_per_series: 1 @@ -37,6 +46,7 @@ series_filters: - "localiser" - "scout" - "positioning" + - "mip" destination: dicom: "ftps" # alternatives: "dicomweb", "xnat", "none" From fe18df17a2185defd7c59798b276374469986cc7 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 30 Jan 2025 15:51:12 +0000 Subject: [PATCH 40/44] Update default config to exclude series with mip in their description --- template_config.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/template_config.yaml b/template_config.yaml index bb9708d48..87ac3e003 100644 --- a/template_config.yaml +++ b/template_config.yaml @@ -39,14 +39,15 @@ allowed_manufacturers: - 1301 - 1501 -min_instances_per_series: 1 +# Filter out any series with a single instance (e.g. PACS reports, screenshots) +min_instances_per_series: 2 series_filters: - "localizer" - "localiser" - "scout" - "positioning" - - "mip" + - "mip" # for PET studies these series have PID destination: dicom: "ftps" # alternatives: "dicomweb", "xnat", "none" From 9b749e6566b778a618fa05d68269ee24e6ba582e Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Tue, 4 Feb 2025 15:43:39 +0000 Subject: [PATCH 41/44] Add ^company as an allowed manufacturer when testing anonymisation --- pixl_dcmd/tests/test_main.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pixl_dcmd/tests/test_main.py b/pixl_dcmd/tests/test_main.py index 178bd3840..248eae5d0 100644 --- a/pixl_dcmd/tests/test_main.py +++ b/pixl_dcmd/tests/test_main.py @@ -34,7 +34,7 @@ ) from core.exceptions import PixlDiscardError, PixlSkipInstanceError from core.project_config import load_project_config, load_tag_operations -from core.project_config.pixl_config_model import load_config_and_validate +from core.project_config.pixl_config_model import load_config_and_validate, Manufacturer from decouple import config from pixl_dcmd.dicom_helpers import get_study_info @@ -218,6 +218,11 @@ def test_anonymise_and_validate_as_external_user( assert dataset != pydicom.dcmread(dataset_path) +@pytest.fixture +def dummy_manufacturer() -> Manufacturer: + return Manufacturer(regex="^company", exclude_series_numbers=[]) + + def ids_for_parameterised_test(val: pathlib.Path) -> str: """Generate test ID for parameterised tests""" return str(val.stem) @@ -228,7 +233,9 @@ def ids_for_parameterised_test(val: pathlib.Path) -> str: PROJECT_CONFIGS_DIR.glob("*.yaml"), ids=ids_for_parameterised_test, ) -def test_anonymise_and_validate_dicom(caplog, request, yaml_file) -> None: +def test_anonymise_and_validate_dicom( + caplog, request, yaml_file, dummy_manufacturer +) -> None: """ Test whether anonymisation and validation works as expected on a vanilla DICOM dataset GIVEN a project configuration with tag operations that creates a DICOM dataset @@ -237,6 +244,8 @@ def test_anonymise_and_validate_dicom(caplog, request, yaml_file) -> None: """ caplog.set_level(logging.WARNING) config = load_project_config(yaml_file.stem) + if dummy_manufacturer not in config.allowed_manufacturers: + config.allowed_manufacturers.append(dummy_manufacturer) for modality in config.project.modalities: caplog.clear() dicom_image = generate_dicom_dataset(Modality=modality) From eb4ee958d06581739e3070201b9f03c0bcddaa4e Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Tue, 4 Feb 2025 16:02:42 +0000 Subject: [PATCH 42/44] Use integers for series_number in tests --- pixl_core/tests/project_config/test_project_config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pixl_core/tests/project_config/test_project_config.py b/pixl_core/tests/project_config/test_project_config.py index 632cf9bfe..15cb5c58b 100644 --- a/pixl_core/tests/project_config/test_project_config.py +++ b/pixl_core/tests/project_config/test_project_config.py @@ -203,10 +203,10 @@ def test_manufacturer_regex_filtering(base_yaml_data, regex, manufacturer, allow @pytest.mark.parametrize( ("manufacturer", "series_number", "expect_exclude"), [ - ("allowed", "2", True), - ("allowed", "4", False), + ("allowed", 2, True), + ("allowed", 4, False), ("allowed", None, True), - ("not-allowed", "4", True), + ("not-allowed", 4, True), ], ) def test_manufacturer_series_number_filterings( @@ -214,7 +214,7 @@ def test_manufacturer_series_number_filterings( ): """Check the series number are correctly excluded.""" base_yaml_data["allowed_manufacturers"] = [ - {"regex": "^allowed", "exclude_series_numbers": ["1", "2", "3"]} + {"regex": "^allowed", "exclude_series_numbers": [1, 2, 3]} ] cfg = PixlConfig.model_validate(base_yaml_data) assert ( From ecb04cee7f23c16e92fea6ede23cdf4ae1f976f4 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Tue, 4 Feb 2025 16:47:16 +0000 Subject: [PATCH 43/44] set min_instances_per_series to 2 by default --- README.md | 2 +- pixl_core/src/core/project_config/pixl_config_model.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index efac2b079..73c00b6a8 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,7 @@ The configuration file defines: - Project name: the `` name of the Project - The DICOM dataset modalities to retain (e.g. `["DX", "CR"]` for X-Ray studies) -- The minimum number of instances required by a series (defaults to 1). May be set higher than 1 to filter out +- The minimum number of instances required by a series (defaults to 2). Can be set higher than 1 to filter out series with a single screenshot containing patient identifiable data - A list of series description filters (e.g. `['loc', 'pos']`). Series with descriptions matching any of these filters will be skipped diff --git a/pixl_core/src/core/project_config/pixl_config_model.py b/pixl_core/src/core/project_config/pixl_config_model.py index 23d9988a1..f035d3dac 100644 --- a/pixl_core/src/core/project_config/pixl_config_model.py +++ b/pixl_core/src/core/project_config/pixl_config_model.py @@ -144,7 +144,7 @@ class PixlConfig(BaseModel): """Project-specific configuration for Pixl.""" project: _Project - min_instances_per_series: Optional[int] = 1 + min_instances_per_series: Optional[int] = 2 series_filters: Optional[list[str]] = [] # pydantic makes a deep copy of the empty default list allowed_manufacturers: list[Manufacturer] = [Manufacturer()] tag_operation_files: TagOperationFiles From caf101a72fd29e1a98273571102110fa568bf034 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 6 Feb 2025 09:41:13 +0000 Subject: [PATCH 44/44] Set min_instances_per_series to 1 for testing --- projects/configs/test-external-user.yaml | 2 ++ projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml | 2 ++ projects/configs/test-extract-uclh-omop-cdm-xnat.yaml | 2 ++ projects/configs/test-extract-uclh-omop-cdm.yaml | 2 ++ projects/configs/test-mr-spectroscopy.yaml | 2 ++ projects/configs/test-radiotherapy.yaml | 2 ++ 6 files changed, 12 insertions(+) diff --git a/projects/configs/test-external-user.yaml b/projects/configs/test-external-user.yaml index c710b2adb..ba567b819 100644 --- a/projects/configs/test-external-user.yaml +++ b/projects/configs/test-external-user.yaml @@ -29,6 +29,8 @@ allowed_manufacturers: - regex: "^company" exclude_series_numbers: [] +min_instances_per_series: 1 + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml b/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml index 7b8fdc884..ef475f85c 100644 --- a/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml @@ -24,6 +24,8 @@ tag_operation_files: - "xray.yaml" manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] +min_instances_per_series: 1 + allowed_manufacturers: - regex: "^company" exclude_series_numbers: [] diff --git a/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml b/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml index d452ad665..ae1967b61 100644 --- a/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml @@ -28,6 +28,8 @@ allowed_manufacturers: - regex: "^company" exclude_series_numbers: [] +min_instances_per_series: 1 + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/test-extract-uclh-omop-cdm.yaml b/projects/configs/test-extract-uclh-omop-cdm.yaml index 4e7f9ec1b..d211efff4 100644 --- a/projects/configs/test-extract-uclh-omop-cdm.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm.yaml @@ -33,6 +33,8 @@ allowed_manufacturers: - regex: "^philips" exclude_series_numbers: [] +min_instances_per_series: 1 + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/test-mr-spectroscopy.yaml b/projects/configs/test-mr-spectroscopy.yaml index 9d38d47ac..ced7ab1bf 100644 --- a/projects/configs/test-mr-spectroscopy.yaml +++ b/projects/configs/test-mr-spectroscopy.yaml @@ -28,6 +28,8 @@ allowed_manufacturers: - regex: "^company" exclude_series_numbers: [] +min_instances_per_series: 1 + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/test-radiotherapy.yaml b/projects/configs/test-radiotherapy.yaml index ea31c4257..ba06dc3f1 100644 --- a/projects/configs/test-radiotherapy.yaml +++ b/projects/configs/test-radiotherapy.yaml @@ -32,6 +32,8 @@ allowed_manufacturers: - regex: "^company" exclude_series_numbers: [] +min_instances_per_series: 1 + series_filters: - "localizer" - "localiser"