diff --git a/README.md b/README.md index ce08560e9..73c00b6a8 100644 --- a/README.md +++ b/README.md @@ -147,6 +147,13 @@ The configuration file defines: - Project name: the `` name of the Project - The DICOM dataset modalities to retain (e.g. `["DX", "CR"]` for X-Ray studies) +- The minimum number of instances required by a series (defaults to 2). Can be set higher than 1 to filter out + series with a single screenshot containing patient identifiable data +- A list of series description filters (e.g. `['loc', 'pos']`). Series with descriptions matching any of these + filters will be skipped +- A list of allowed manufacturers. By default, no manufacturers are allowed. For each manufacturer: + - A regex to identify the allowed manufacturer (e.g. `^philips`) + - A list of series numbers to exclude for the given manufacturer (e.g. `[3, 4]`) - The [anonymisation operations](/pixl_dcmd/README.md#tag-scheme-anonymisation) to be applied to the DICOM tags, by providing a file path to one or multiple YAML files. We currently allow two types of files: diff --git a/cli/src/pixl_cli/_config.py b/cli/src/pixl_cli/_config.py index 7a4f578cc..8337f7578 100644 --- a/cli/src/pixl_cli/_config.py +++ b/cli/src/pixl_cli/_config.py @@ -35,7 +35,7 @@ "password": config("PIXL_DB_PASSWORD"), "database": config("PIXL_DB_NAME"), }, -} # type: dict +} class APIConfig: diff --git a/orthanc/orthanc-anon/plugin/pixl.py b/orthanc/orthanc-anon/plugin/pixl.py index e879bc168..18d349cb3 100644 --- a/orthanc/orthanc-anon/plugin/pixl.py +++ b/orthanc/orthanc-anon/plugin/pixl.py @@ -45,6 +45,7 @@ from pixl_dcmd.dicom_helpers import get_study_info from pixl_dcmd.main import ( anonymise_dicom_and_update_db, + get_series_to_skip, parse_validation_results, write_dataset_to_bytes, ) @@ -331,6 +332,7 @@ def _anonymise_study_instances( Return a list of the bytes of anonymised instances, and the anonymised StudyInstanceUID. """ config = load_project_config(project_name) + series_to_skip = get_series_to_skip(zipped_study, config.min_instances_per_series) anonymised_instances_bytes = [] skipped_instance_counts = defaultdict(int) dicom_validation_errors = {} @@ -339,6 +341,17 @@ def _anonymise_study_instances( with zipped_study.open(file_info) as file: logger.debug("Reading file {}", file) dataset = dcmread(file) + + if dataset.SeriesInstanceUID in series_to_skip: + logger.debug( + "Skipping series {} for study {} due to too few instances", + dataset.SeriesInstanceUID, + study_info, + ) + key = "DICOM instance discarded as series has too few instances" + skipped_instance_counts[key] += 1 + continue + try: anonymised_instance, instance_validation_errors = _anonymise_dicom_instance( dataset, config diff --git a/pixl_core/src/core/project_config/pixl_config_model.py b/pixl_core/src/core/project_config/pixl_config_model.py index 407daee61..f035d3dac 100644 --- a/pixl_core/src/core/project_config/pixl_config_model.py +++ b/pixl_core/src/core/project_config/pixl_config_model.py @@ -16,6 +16,7 @@ from __future__ import annotations +import re from enum import Enum from pathlib import Path from typing import Any, Optional @@ -61,6 +62,16 @@ class _Project(BaseModel): modalities: list[str] +class Manufacturer(BaseModel): + """ + An allowed manufacturer for a project. + Also defines which series numbers to exclude for this manufacturer. + """ + + regex: str = "no manufacturers allowed ^" + exclude_series_numbers: list[int] = [] + + class TagOperationFiles(BaseModel): """Tag operations files for a project. At least a base file is required.""" @@ -133,20 +144,66 @@ class PixlConfig(BaseModel): """Project-specific configuration for Pixl.""" project: _Project - series_filters: Optional[list[str]] = None + min_instances_per_series: Optional[int] = 2 + series_filters: Optional[list[str]] = [] # pydantic makes a deep copy of the empty default list + allowed_manufacturers: list[Manufacturer] = [Manufacturer()] tag_operation_files: TagOperationFiles destination: _Destination - def is_series_excluded(self, series_description: str) -> bool: + def is_series_description_excluded(self, series_description: str | None) -> bool: """ - Return whether this config excludes the series with the given description + Return whether this config excludes the series with the given description. + + Do a simple case-insensitive substring check - this data is ultimately typed by a human, and + different image sources may have different conventions for case conversion. + :param series_description: the series description to test :returns: True if it should be excluded, False if not """ - if self.series_filters is None or series_description is None: + if not self.series_filters or series_description is None: return False - # Do a simple case-insensitive substring check - this data is ultimately typed by a human, - # and different image sources may have different conventions for case conversion. + return any( series_description.upper().find(filt.upper()) != -1 for filt in self.series_filters ) + + def is_series_number_excluded(self, manufacturer: str, series_number: str | None) -> bool: + """ + Return whether this config excludes the series with the given number for the given + manufacturer. + + :param manufacturer: the manufacturer to test + :param series_number: the series number to test + :returns: True if it should be excluded, False if not + """ + if not self.is_manufacturer_allowed(manufacturer) or series_number is None: + return True + + exclude_series_numbers = self._get_manufacturer(manufacturer).exclude_series_numbers + return series_number in exclude_series_numbers + + def is_manufacturer_allowed(self, manufacturer: str) -> bool: + """ + Check whether the manufacturer is in the allow-list. + + :param manufacturer: name of the manufacturer + :returns: True is the manufacturer is allowed, False if not + """ + for manufacturer_config in self.allowed_manufacturers: + if re.search(rf"{manufacturer_config.regex}", manufacturer, flags=re.IGNORECASE): + return True + return False + + def _get_manufacturer(self, manufacturer: str) -> Manufacturer: + """ + Get the manufacturer configuration for the given manufacturer. + + :param manufacturer: name of the manufacturer + :returns: Manufacturer configuration + :raises: ValueError: if the manufacturer is not allowed + """ + for manufacturer_config in self.allowed_manufacturers: + if re.search(rf"{manufacturer_config.regex}", manufacturer, flags=re.IGNORECASE): + return manufacturer_config + msg = f"Manufacturer {manufacturer} is not allowed by project {self.project.name}" + raise ValueError(msg) diff --git a/pixl_core/tests/project_config/test_project_config.py b/pixl_core/tests/project_config/test_project_config.py index 56ae5964d..15cb5c58b 100644 --- a/pixl_core/tests/project_config/test_project_config.py +++ b/pixl_core/tests/project_config/test_project_config.py @@ -181,4 +181,43 @@ def test_series_filtering(base_yaml_data, series_filters, test_series_desc, expe if series_filters is not None: base_yaml_data["series_filters"] = series_filters cfg = PixlConfig.model_validate(base_yaml_data) - assert cfg.is_series_excluded(test_series_desc) == expect_exclude + assert cfg.is_series_description_excluded(test_series_desc) == expect_exclude + + +@pytest.mark.parametrize( + ("regex", "manufacturer", "allowed"), + [ + ("^allowed", "allowed", True), + ("allowed", "not-allowed", False), + (None, "allowed", False), + ], +) +def test_manufacturer_regex_filtering(base_yaml_data, regex, manufacturer, allowed): + """Check the allowed manufacturers regex works.""" + if regex is not None: + base_yaml_data["allowed_manufacturers"] = [{"regex": "^allowed"}] + cfg = PixlConfig.model_validate(base_yaml_data) + assert cfg.is_manufacturer_allowed(manufacturer) == allowed + + +@pytest.mark.parametrize( + ("manufacturer", "series_number", "expect_exclude"), + [ + ("allowed", 2, True), + ("allowed", 4, False), + ("allowed", None, True), + ("not-allowed", 4, True), + ], +) +def test_manufacturer_series_number_filterings( + base_yaml_data, manufacturer, series_number, expect_exclude +): + """Check the series number are correctly excluded.""" + base_yaml_data["allowed_manufacturers"] = [ + {"regex": "^allowed", "exclude_series_numbers": [1, 2, 3]} + ] + cfg = PixlConfig.model_validate(base_yaml_data) + assert ( + cfg.is_series_number_excluded(manufacturer=manufacturer, series_number=series_number) + == expect_exclude + ) diff --git a/pixl_dcmd/src/pixl_dcmd/main.py b/pixl_dcmd/src/pixl_dcmd/main.py index 7d0015506..808e82862 100644 --- a/pixl_dcmd/src/pixl_dcmd/main.py +++ b/pixl_dcmd/src/pixl_dcmd/main.py @@ -16,6 +16,7 @@ import typing from functools import lru_cache from io import BytesIO +from zipfile import ZipFile import requests from core.exceptions import PixlSkipInstanceError @@ -26,7 +27,8 @@ anonymize_dataset, ) from loguru import logger -from pydicom import DataElement, Dataset, dcmwrite +from pydicom import DataElement, Dataset, dcmread, dcmwrite +import pydicom from core.project_config.pixl_config_model import PixlConfig from pixl_dcmd._database import ( @@ -43,6 +45,10 @@ from pixl_dcmd.dicom_helpers import StudyInfo +# See: https://github.com/pydicom/pydicom/issues/2170 +pydicom.config.convert_wrong_length_to_UN = True + + def write_dataset_to_bytes(dataset: Dataset) -> bytes: """ Write pydicom DICOM dataset to byte array @@ -56,14 +62,72 @@ def write_dataset_to_bytes(dataset: Dataset) -> bytes: return buffer.read() +def get_series_to_skip(zipped_study: ZipFile, min_instances: int) -> set[str]: + """ + Determine which series to skip based on the number of instances in the series. + + If a series has fewer instances than `min_instances`, add it to a set of series to skip. + + Args: + zipped_study: ZipFile containing the study + min_instances: Minimum number of instances required to include a series + + """ + if min_instances <= 1: + return set() + + series_instances = {} + for file_info in zipped_study.infolist(): + with zipped_study.open(file_info) as file: + logger.debug("Reading file {}", file) + dataset = dcmread(file) + if dataset.SeriesInstanceUID not in series_instances: + series_instances[dataset.SeriesInstanceUID] = 1 + continue + series_instances[dataset.SeriesInstanceUID] += 1 + + return { + series for series, count in series_instances.items() if count < min_instances + } + + def _should_exclude_series(dataset: Dataset, cfg: PixlConfig) -> bool: + """ + Check whether the dataset series should be exlucded based on its description + and number. + """ series_description = dataset.get("SeriesDescription") - if cfg.is_series_excluded(series_description): + if cfg.is_series_description_excluded(series_description): logger.debug("FILTERING OUT series description: {}", series_description) return True + + manufacturer = dataset.get("Manufacturer") + series_number = dataset.get("SeriesNumber") + if cfg.is_series_number_excluded( + manufacturer=manufacturer, series_number=series_number + ): + logger.debug( + "FILTERING OUT series number: {} for manufacturer: {}", + series_number, + manufacturer, + ) + return True + return False +def _should_exclude_manufacturer(dataset: Dataset, cfg: PixlConfig) -> bool: + manufacturer = dataset.get("Manufacturer") + if manufacturer is None: + logger.debug("FILTERING out as manufacturer tag is missing") + return True + + should_exclude = not cfg.is_manufacturer_allowed(manufacturer=manufacturer) + if should_exclude: + logger.debug("FILTERING out manufacturer: {}", manufacturer) + return should_exclude + + def anonymise_dicom_and_update_db( dataset: Dataset, *, @@ -125,9 +189,12 @@ def anonymise_dicom( ) # Do before anonymisation in case someone decides to delete the - # Series Description tag as part of anonymisation. + # Series Description or Manufacturer tags as part of anonymisation. + if _should_exclude_manufacturer(dataset, config): + msg = "DICOM instance discarded due to its manufacturer" + raise PixlSkipInstanceError(msg) if _should_exclude_series(dataset, config): - msg = "DICOM instance discarded due to its series description" + msg = "DICOM instance discarded due to its series description or number" raise PixlSkipInstanceError(msg) if dataset.Modality not in config.project.modalities: msg = f"Dropping DICOM Modality: {dataset.Modality}" diff --git a/pixl_dcmd/tests/test_main.py b/pixl_dcmd/tests/test_main.py index 39aa92bcc..248eae5d0 100644 --- a/pixl_dcmd/tests/test_main.py +++ b/pixl_dcmd/tests/test_main.py @@ -13,11 +13,13 @@ # limitations under the License. from __future__ import annotations +from importlib import resources import pathlib import re from pathlib import Path import logging import typing +import zipfile import nibabel import numpy as np @@ -32,7 +34,7 @@ ) from core.exceptions import PixlDiscardError, PixlSkipInstanceError from core.project_config import load_project_config, load_tag_operations -from core.project_config.pixl_config_model import load_config_and_validate +from core.project_config.pixl_config_model import load_config_and_validate, Manufacturer from decouple import config from pixl_dcmd.dicom_helpers import get_study_info @@ -41,8 +43,10 @@ _anonymise_dicom_from_scheme, anonymise_and_validate_dicom, anonymise_dicom, + get_series_to_skip, _enforce_allowlist, _should_exclude_series, + _should_exclude_manufacturer, ) from pytest_pixl.dicom import generate_dicom_dataset from pytest_pixl.helpers import run_subprocess @@ -54,6 +58,32 @@ TEST_PROJECT_SLUG = "test-extract-uclh-omop-cdm" +@pytest.fixture() +def zipped_dicom_study() -> Path: + """Dummy DICOM study for tests.""" + path = resources.files("pytest_pixl") / "data" / "dicom-study" / "study.zip" + return zipfile.ZipFile(path) + + +@pytest.mark.parametrize( + ("min_instances", "expected_num_series_skipped"), + [ + (1, 0), + (2, 4), + ], +) +def test_get_series_to_skip( + zipped_dicom_study: zipfile.ZipFile, + min_instances: int, + expected_num_series_skipped: int, +): + """ + Check series are skipped if containing too few instances. + """ + series_to_skip = get_series_to_skip(zipped_dicom_study, min_instances) + assert len(series_to_skip) == expected_num_series_skipped + + @pytest.fixture(scope="module") def tag_scheme(test_project_config: PixlConfig) -> list[dict]: """Base tag scheme for testing.""" @@ -188,15 +218,24 @@ def test_anonymise_and_validate_as_external_user( assert dataset != pydicom.dcmread(dataset_path) +@pytest.fixture +def dummy_manufacturer() -> Manufacturer: + return Manufacturer(regex="^company", exclude_series_numbers=[]) + + def ids_for_parameterised_test(val: pathlib.Path) -> str: """Generate test ID for parameterised tests""" return str(val.stem) @pytest.mark.parametrize( - ("yaml_file"), PROJECT_CONFIGS_DIR.glob("*.yaml"), ids=ids_for_parameterised_test + ("yaml_file"), + PROJECT_CONFIGS_DIR.glob("*.yaml"), + ids=ids_for_parameterised_test, ) -def test_anonymise_and_validate_dicom(caplog, request, yaml_file) -> None: +def test_anonymise_and_validate_dicom( + caplog, request, yaml_file, dummy_manufacturer +) -> None: """ Test whether anonymisation and validation works as expected on a vanilla DICOM dataset GIVEN a project configuration with tag operations that creates a DICOM dataset @@ -205,6 +244,8 @@ def test_anonymise_and_validate_dicom(caplog, request, yaml_file) -> None: """ caplog.set_level(logging.WARNING) config = load_project_config(yaml_file.stem) + if dummy_manufacturer not in config.allowed_manufacturers: + config.allowed_manufacturers.append(dummy_manufacturer) for modality in config.project.modalities: caplog.clear() dicom_image = generate_dicom_dataset(Modality=modality) @@ -405,40 +446,58 @@ def test_no_pseudo_patient_id_processing( ) -@pytest.fixture() -def dicom_series_to_keep() -> list[pydicom.Dataset]: - series = [ - "", - "whatever", - ] - return [_make_dicom(s) for s in series] - - -@pytest.fixture() -def dicom_series_to_exclude() -> list[pydicom.Dataset]: - series = [ - "positioning", - "foo_barpositioning", - "positioningla", - "scout", - "localiser", - "localizer", - # Matching should be case insensitive - "lOcALIsER", - ] - return [_make_dicom(s) for s in series] +def _make_dicom( + series_description="mri_sequence", + manufacturer="Company", + series_number="901", +) -> pydicom.Dataset: + return generate_dicom_dataset( + SeriesDescription=series_description, + Manufacturer=manufacturer, + SeriesNumber=series_number, + ) -def _make_dicom(series_description) -> pydicom.Dataset: - return generate_dicom_dataset(SeriesDescription=series_description) +@pytest.mark.parametrize( + ("series_description", "manufacturer", "series_number", "expect_exclude"), + [ + ("", "Company", 1, False), + ("whatever", "Company", 1, False), + ("whatever", "Company", None, True), + ("positioning", "Company", 1, True), + ("foo_barpositioning", "Company", 1, True), + ("positioningla", "Company", 1, True), + ("scout", "Company", 1, True), + ("localiser", "Company", 1, True), + ("localizer", "Company", 1, True), + ("lOcALIsER", "Company", 1, True), + ("", "DifferentCompany", 1, True), + ("", "Company", 123456789, True), + ], +) +def test_should_exclude_series( + series_description, manufacturer, series_number, expect_exclude +): + config = load_project_config(TEST_PROJECT_SLUG) + ds = _make_dicom(series_description, manufacturer, series_number) + assert _should_exclude_series(ds, config) == expect_exclude -def test_should_exclude_series(dicom_series_to_exclude, dicom_series_to_keep): +@pytest.mark.parametrize( + ("manufacturer", "expect_exclude"), + [ + ("Company", False), + ("DifferentCompany", True), + (None, True), + ], +) +def test_should_exclude_manufacturer(manufacturer, expect_exclude): config = load_project_config(TEST_PROJECT_SLUG) - for s in dicom_series_to_keep: - assert not _should_exclude_series(s, config) - for s in dicom_series_to_exclude: - assert _should_exclude_series(s, config) + ds = _make_dicom(manufacturer=manufacturer) + if manufacturer is None: + # the Manufacturer tag is sometimes missing in real data + delattr(ds, "Manufacturer") + assert _should_exclude_manufacturer(ds, config) == expect_exclude def test_can_nifti_convert_post_anonymisation( diff --git a/projects/configs/despiad.yaml b/projects/configs/despiad.yaml new file mode 100644 index 000000000..b1f5be5e7 --- /dev/null +++ b/projects/configs/despiad.yaml @@ -0,0 +1,50 @@ +# Copyright (c) 2024 University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +project: + name: "despiad" + modalities: + - "CT" + - "PT" + +tag_operation_files: + base: + - "base.yaml" + - "ct.yaml" + - "pet.yaml" + - "despiad.yaml" + manufacturer_overrides: [] + +allowed_manufacturers: + - regex: "^ge medical systems" + exclude_series_numbers: + - 1200 + - 1201 + - 1202 + - 1203 + - 1301 + - 1501 + +min_instances_per_series: 2 + +series_filters: + - "localizer" + - "localiser" + - "scout" + - "positioning" + - "mip" + +destination: + dicom: "xnat" + parquet: "none" diff --git a/projects/configs/ms-pinpoint.yaml b/projects/configs/ms-pinpoint.yaml index a68258adf..db78dc463 100644 --- a/projects/configs/ms-pinpoint.yaml +++ b/projects/configs/ms-pinpoint.yaml @@ -23,6 +23,12 @@ tag_operation_files: - "ms-pinpoint.yaml" manufacturer_overrides: ["mri.yaml"] +allowed_manufacturers: + - regex: ".*" + exclude_series_numbers: [] + +min_instances_per_series: 1 + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/prognosis-ai.yaml b/projects/configs/prognosis-ai.yaml index f1292856e..8594f119b 100644 --- a/projects/configs/prognosis-ai.yaml +++ b/projects/configs/prognosis-ai.yaml @@ -23,6 +23,12 @@ tag_operation_files: - "ion-neuro-db.yaml" manufacturer_overrides: ["mri.yaml"] +allowed_manufacturers: + - regex: ".*" + exclude_series_numbers: [] + +min_instances_per_series: 1 + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/tag-operations/ct.yaml b/projects/configs/tag-operations/ct.yaml new file mode 100644 index 000000000..dd750eb1b --- /dev/null +++ b/projects/configs/tag-operations/ct.yaml @@ -0,0 +1,102 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Conversion Type + group: 0x0008 + element: 0x0064 + op: keep +- name: Spacing Between Slices + group: 0x0018 + element: 0x0088 + op: keep +- name: Data Collection Diameter + group: 0x0018 + element: 0x0090 + op: keep +- name: Reconstruction Diameter + group: 0x0018 + element: 0x1100 + op: keep +- name: Distance Source to Detector + group: 0x0018 + element: 0x1110 + op: keep +- name: Distance Source to Patient + group: 0x0018 + element: 0x1111 + op: keep +- name: Gantry Detector Tilt + group: 0x0018 + element: 0x1120 + op: keep +- name: Table Height + group: 0x0018 + element: 0x1130 + op: keep +- name: Rotation Direction + group: 0x0018 + element: 0x1140 + op: keep +- name: Exposure Time + group: 0x0018 + element: 0x1150 + op: keep +- name: X-Ray Tube Current + group: 0x0018 + element: 0x1151 + op: keep +- name: Exposure + group: 0x0018 + element: 0x1152 + op: keep +- name: Filter Type + group: 0x0018 + element: 0x1160 + op: keep +- name: Generator Power + group: 0x0018 + element: 0x1170 + op: keep +- name: Convolution Kernel + group: 0x0018 + element: 0x1210 + op: keep +- name: Revolution Time + group: 0x0018 + element: 0x9305 + op: keep +- name: Single Collimation Width + group: 0x0018 + element: 0x9306 + op: keep +- name: Total Collimation Width + group: 0x0018 + element: 0x9307 + op: keep +- name: Table Speed + group: 0x0018 + element: 0x9309 + op: keep +- name: Table Feed per Rotation + group: 0x0018 + element: 0x9310 + op: keep +- name: Spiral Pitch Factor + group: 0x0018 + element: 0x9311 + op: keep +- name: Slice Location + group: 0x0020 + element: 0x1041 + op: keep diff --git a/projects/configs/tag-operations/despiad.yaml b/projects/configs/tag-operations/despiad.yaml new file mode 100644 index 000000000..35fb2587a --- /dev/null +++ b/projects/configs/tag-operations/despiad.yaml @@ -0,0 +1,42 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Study Date + group: 0x0008 + element: 0x0020 + op: keep +- name: Series Date + group: 0x0008 + element: 0x0021 + op: keep +- name: Acquisition Date + group: 0x0008 + element: 0x0022 + op: keep +- name: Series Time + group: 0x0008 + element: 0x0031 + op: keep +- name: Acquisition Time + group: 0x0008 + element: 0x0032 + op: keep +- name: Station Name + group: 0x0008 + element: 0x1010 + op: keep +- name: Patient's Birth Date + group: 0x0010 + element: 0x0030 + op: keep diff --git a/projects/configs/tag-operations/pet.yaml b/projects/configs/tag-operations/pet.yaml new file mode 100644 index 000000000..17065eec0 --- /dev/null +++ b/projects/configs/tag-operations/pet.yaml @@ -0,0 +1,222 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Series Date + group: 0x0008 + element: 0x0021 + op: replace +- name: Acquisition Date + group: 0x0008 + element: 0x0022 + op: replace +- name: Series Time + group: 0x0008 + element: 0x0031 + op: keep +- name: Acquisition Time + group: 0x0008 + element: 0x0032 + op: keep +- name: Code Value + group: 0x0008 + element: 0x0100 + op: keep +- name: Coding Scheme Designator + group: 0x0008 + element: 0x0102 + op: keep +- name: Code Meaning + group: 0x0008 + element: 0x0104 + op: keep +- name: Mapping Resource + group: 0x0008 + element: 0x0105 + op: keep +- name: Context Group Version + group: 0x0008 + element: 0x0106 + op: keep +- name: Context Identifier + group: 0x0008 + element: 0x010f + op: keep +- name: Patients Size + group: 0x0010 + element: 0x1020 + op: keep +- name: Patients Weight + group: 0x0010 + element: 0x1030 + op: keep +- name: Radiopharmaceutical + group: 0x0018 + element: 0x0031 + op: keep +- name: Slice Thickness + group: 0x0018 + element: 0x0050 + op: keep +- name: Radiopharmaceutical Start Time + group: 0x0018 + element: 0x1072 + op: keep +- name: Radiopharmaceutical Stop Time + group: 0x0018 + element: 0x1073 + op: keep +- name: Radionuclide Total Dose + group: 0x0018 + element: 0x1074 + op: keep +- name: Radionuclide Half Life + group: 0x0018 + element: 0x1075 + op: keep +- name: Radionuclide Positron Fraction + group: 0x0018 + element: 0x1076 + op: keep +- name: Radiopharmaceutical Start DateTime + group: 0x0018 + element: 0x1078 + op: replace +- name: Collimator Type + group: 0x0018 + element: 0x1181 + op: keep +- name: Actual Frame Duration + group: 0x0018 + element: 0x1242 + op: keep +- name: Corrected Image + group: 0x0028 + element: 0x0051 + op: keep +- name: Radiopharmaceutical Information Sequence + group: 0x0054 + element: 0x0016 + op: keep +- name: Number of Slices + group: 0x0054 + element: 0x0081 + op: keep +- name: Number of Time Slices + group: 0x0054 + element: 0x0101 + op: keep +- name: Radionuclide Code Sequence + group: 0x0054 + element: 0x0300 + op: keep +- name: Radiopharmaceutical Code Sequence + group: 0x0054 + element: 0x0304 + op: keep +- name: Patient Orientation Code Sequence + group: 0x0054 + element: 0x0410 + op: keep +- name: Patient Orientation Modifier Code Sequence + group: 0x0054 + element: 0x0412 + op: keep +- name: Patient Gantry Relationship Code Sequence + group: 0x0054 + element: 0x0414 + op: keep +- name: Series Type + group: 0x0054 + element: 0x1000 + op: keep +- name: Units + group: 0x0054 + element: 0x1001 + op: keep +- name: Counts Source + group: 0x0054 + element: 0x1002 + op: keep +- name: Randoms Correction Method + group: 0x0054 + element: 0x1100 + op: keep +- name: Attenuation Correction Method + group: 0x0054 + element: 0x1101 + op: keep +- name: Decay Correction + group: 0x0054 + element: 0x1102 + op: keep +- name: Reconstruction Method + group: 0x0054 + element: 0x1103 + op: keep +- name: Detector Lines of Response Used + group: 0x0054 + element: 0x1104 + op: keep +- name: Scatter Correction Method + group: 0x0054 + element: 0x1105 + op: keep +- name: Axial Mash + group: 0x0054 + element: 0x1201 + op: keep +- name: Transverse Mash + group: 0x0054 + element: 0x1202 + op: keep +- name: Coincidence Window Width + group: 0x0054 + element: 0x1210 + op: keep +- name: Frame Reference Time + group: 0x0054 + element: 0x1300 + op: keep +- name: Primary Prompts Counts Accumulated + group: 0x0054 + element: 0x1310 + op: keep +- name: Secondary Counts Accumulated + group: 0x0054 + element: 0x1311 + op: keep +- name: Slice Sensitivity Factor + group: 0x0054 + element: 0x1320 + op: keep +- name: Decay Factor + group: 0x0054 + element: 0x1321 + op: keep +- name: Dose Calibration Factor + group: 0x0054 + element: 0x1322 + op: keep +- name: Scatter Fraction Factor + group: 0x0054 + element: 0x1323 + op: keep +- name: Dead Time Factor + group: 0x0054 + element: 0x1324 + op: keep +- name: Image Index + group: 0x0054 + element: 0x1330 + op: keep diff --git a/projects/configs/test-external-user.yaml b/projects/configs/test-external-user.yaml index ad4734d07..ba567b819 100644 --- a/projects/configs/test-external-user.yaml +++ b/projects/configs/test-external-user.yaml @@ -23,6 +23,14 @@ tag_operation_files: - "diffusion-weighted-mri.yaml" manufacturer_overrides: ["mri-diffusion.yaml"] +allowed_manufacturers: + - regex: "^siemens" + exclude_series_numbers: [] + - regex: "^company" + exclude_series_numbers: [] + +min_instances_per_series: 1 + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml b/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml index a2463fb51..ef475f85c 100644 --- a/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml @@ -24,6 +24,12 @@ tag_operation_files: - "xray.yaml" manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] +min_instances_per_series: 1 + +allowed_manufacturers: + - regex: "^company" + exclude_series_numbers: [] + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml b/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml index 2ed67c450..ae1967b61 100644 --- a/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml @@ -24,6 +24,12 @@ tag_operation_files: - "xray.yaml" manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] +allowed_manufacturers: + - regex: "^company" + exclude_series_numbers: [] + +min_instances_per_series: 1 + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/test-extract-uclh-omop-cdm.yaml b/projects/configs/test-extract-uclh-omop-cdm.yaml index 52199ef1e..d211efff4 100644 --- a/projects/configs/test-extract-uclh-omop-cdm.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm.yaml @@ -24,6 +24,17 @@ tag_operation_files: - "xray.yaml" manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] +allowed_manufacturers: + - regex: "^carestream" + exclude_series_numbers: [] + - regex: "^company" + exclude_series_numbers: + - 123456789 + - regex: "^philips" + exclude_series_numbers: [] + +min_instances_per_series: 1 + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/test-mr-spectroscopy.yaml b/projects/configs/test-mr-spectroscopy.yaml index 6571deb1e..ced7ab1bf 100644 --- a/projects/configs/test-mr-spectroscopy.yaml +++ b/projects/configs/test-mr-spectroscopy.yaml @@ -24,6 +24,12 @@ tag_operation_files: manufacturer_overrides: - "mri.yaml" +allowed_manufacturers: + - regex: "^company" + exclude_series_numbers: [] + +min_instances_per_series: 1 + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/test-radiotherapy.yaml b/projects/configs/test-radiotherapy.yaml index 029610699..ba06dc3f1 100644 --- a/projects/configs/test-radiotherapy.yaml +++ b/projects/configs/test-radiotherapy.yaml @@ -28,6 +28,12 @@ tag_operation_files: - "rt-struct.yaml" manufacturer_overrides: null +allowed_manufacturers: + - regex: "^company" + exclude_series_numbers: [] + +min_instances_per_series: 1 + series_filters: - "localizer" - "localiser" diff --git a/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml b/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml index c54eac399..ca0afff29 100644 --- a/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml +++ b/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml @@ -22,6 +22,13 @@ tag_operation_files: - "base.yaml" #Expected base config file for any project - "xray.yaml" manufacturer_overrides: null + +allowed_manufacturers: + - regex: ".*" + exclude_series_numbers: [] + +min_instances_per_series: 1 + destination: dicom: "ftps" parquet: "ftps" diff --git a/projects/configs/uclh-prostate-mri-external-dataset.yaml b/projects/configs/uclh-prostate-mri-external-dataset.yaml index 326e94c92..a68e09134 100644 --- a/projects/configs/uclh-prostate-mri-external-dataset.yaml +++ b/projects/configs/uclh-prostate-mri-external-dataset.yaml @@ -22,6 +22,13 @@ tag_operation_files: - "mri.yaml" - "diffusion-weighted-mri.yaml" manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] + +allowed_manufacturers: + - regex: ".*" + exclude_series_numbers: [] + +min_instances_per_series: 1 + destination: dicom: "ftps" parquet: "ftps" diff --git a/pytest-pixl/src/pytest_pixl/data/dicom-study/study.zip b/pytest-pixl/src/pytest_pixl/data/dicom-study/study.zip new file mode 100644 index 000000000..33d8bd9ba Binary files /dev/null and b/pytest-pixl/src/pytest_pixl/data/dicom-study/study.zip differ diff --git a/template_config.yaml b/template_config.yaml index 223c11fe9..87ac3e003 100644 --- a/template_config.yaml +++ b/template_config.yaml @@ -20,7 +20,34 @@ project: tag_operation_files: # DICOM tag anonymisation operations base: - "base-tag-operations.yaml" # Base schema - manufacturer_overrides: none # Manufactuer-dependendent overrides + manufacturer_overrides: [] # Manufactuer-dependendent overrides + +allowed_manufacturers: + - regex: "^example-manufacturer" + exclude_series_numbers: [] + # For DICOM generated by Canon, Series "8000" always has bunred-in data + # so we always exlcude this series + - regex: "^canon" + exclude_series_numbers: + - 8000 + - regex: "^ge medical systems" + exclude_series_numbers: + - 1200 + - 1201 + - 1202 + - 1203 + - 1301 + - 1501 + +# Filter out any series with a single instance (e.g. PACS reports, screenshots) +min_instances_per_series: 2 + +series_filters: + - "localizer" + - "localiser" + - "scout" + - "positioning" + - "mip" # for PET studies these series have PID destination: dicom: "ftps" # alternatives: "dicomweb", "xnat", "none"