From e45760b24c21f554fbef37356d2c40d4117e3f45 Mon Sep 17 00:00:00 2001 From: Theodore Zhao Date: Thu, 5 Sep 2024 21:52:00 +0000 Subject: [PATCH] Initial commit Add files via upload Microsoft mandatory file remove zip file Code from BiomedParse Update README.md adding figure scripts improve readme Add files via upload Add inference code Update readme Add figure plots and readme ignore plots ignor large file ignore result Configure Git LFS and add large files Track large JSON files with LFS adding back results Add demo dataset Cleaned up a conda environment Update config with correct normalization Conda environment setup without MPI fixed typo fix target_dist.json fix target_dist.json Fix conda environment for clip Change utils folder to utilities folder Medical image preprocessing Update README with dataset list Fix formatting issue Update README.md Adds Usage and License Notices Update README.md Fix for compatible with numpy Update README.md --- README.md | 162 ++++++---------- assets/readmes/DATASET.md | 42 ++++ assets/requirements/requirements.txt | 10 +- assets/requirements/requirements_custom.txt | 2 +- datasets/build.py | 2 +- .../dataset_mappers/biomed_dataset_mapper.py | 2 +- .../evaluation/classification_evaluation.py | 4 +- .../evaluation/segmentation_evaluation.py | 2 +- entry.py | 2 +- environment.yml | 6 +- example_prediction.py | 11 +- inference_utils/inference.py | 2 +- inference_utils/processing_utils.py | 182 ++++++++++++++++++ modeling/BaseModel.py | 2 +- modeling/architectures/seem_model_demo.py | 6 +- modeling/architectures/seem_model_v0.py | 4 +- modeling/architectures/seem_model_v1.py | 6 +- modeling/architectures/xdecoder_model.py | 4 +- modeling/language/LangEncoder/transformer.py | 4 +- modeling/language/misc.py | 2 +- modeling/language/vlpencoder.py | 2 +- modeling/utils/misc.py | 2 +- pipeline/XDecoderPipeline.py | 4 +- trainer/utils_trainer.py | 2 +- trainer/xdecoder_trainer.py | 2 +- {utils => utilities}/Config.py | 0 {utils => utilities}/__init__.py | 0 {utils => utilities}/arguments.py | 0 {utils => utilities}/constants.py | 0 {utils => utilities}/dataset.py | 0 {utils => utilities}/distributed.py | 2 +- {utils => utilities}/misc.py | 0 {utils => utilities}/model.py | 2 +- {utils => utilities}/prompt_engineering.py | 0 {utils => utilities}/visualizer.py | 0 35 files changed, 321 insertions(+), 152 deletions(-) create mode 100644 assets/readmes/DATASET.md create mode 100644 inference_utils/processing_utils.py rename {utils => utilities}/Config.py (100%) rename {utils => utilities}/__init__.py (100%) rename {utils => utilities}/arguments.py (100%) rename {utils => utilities}/constants.py (100%) rename {utils => utilities}/dataset.py (100%) rename {utils => utilities}/distributed.py (99%) rename {utils => utilities}/misc.py (100%) rename {utils => utilities}/model.py (97%) rename {utils => utilities}/prompt_engineering.py (100%) rename {utils => utilities}/visualizer.py (100%) diff --git a/README.md b/README.md index e73333a..1192c86 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,22 @@ # **BiomedParse** -:grapes: \[[Read Our arXiv Paper](https://arxiv.org/abs/2405.12971)\]   :apple: \[[Check Our Demo](https://microsoft.github.io/BiomedParse/)\] +This is the official repository for "A foundation model for joint segmentation, detection and recognition of biomedical objects across nine modalities" +\[[Read Our arXiv Paper](https://arxiv.org/abs/2405.12971)\] \[[Check Our Demo](https://microsoft.github.io/BiomedParse/)\] ## Installation +```sh +git clone https://github.com/microsoft/BiomedParse.git +``` ### Conda Environment Setup -Create a new conda environment +#### Option 1: Directly build the conda environment +Under the project directory, run ```sh -conda create -n biomedparse python=3.9 +conda env create -f environment.yml +``` + +#### Option 2: Create a new conda environment from scratch +```sh +conda create -n biomedparse python=3.9.19 conda activate biomedparse ``` @@ -14,45 +24,23 @@ Install Pytorch ```sh conda install pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch -c nvidia ``` -In case there is issue with detectron2 installation in the following, make sure your pytorch version is compatible with CUDA version on your machine at https://pytorch.org/. +In case there is issue with detectron2 installation, make sure your pytorch version is compatible with CUDA version on your machine at https://pytorch.org/. Install dependencies ```sh pip install -r assets/requirements/requirements.txt -pip install -r assets/requirements/requirements_custom.txt -``` - -### Install Docker - -In order to make sure the environment is set up correctly, we use run BiomedParse on a Docker image. Follow these commands to install Docker on Ubuntu: - -```sh -sudo apt update -sudo apt install apt-transport-https ca-certificates curl software-properties-common -curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - -sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -sudo apt update -apt-cache policy docker-ce -sudo apt install docker-ce ``` -## Prepare Docker Environment - -Specify the project directories in `docker/README.md`. - -Run the following commands to set up the Docker environment: - -```sh -bash docker/docker_build.sh -bash docker/docker_run.sh -bash docker/setup_inside_docker.sh -source docker/data_env.sh -``` +## Dataset +BiomedParseData was created from preprocessing publicly available biomedical image segmentation datasets. Check a subset of our processed datasets on HuggingFace: https://huggingface.co/datasets/microsoft/BiomedParseData. For the source datasets, please check the details here: [BiomedParseData](assets/readmes/DATASET.md). As a quick start, we've samples a tiny demo dataset at biomedparse_datasets/BiomedParseData-Demo -## Dataset Description and Preparation +## Model Checkpoints +We host our model checkpoints on HuggingFace here: https://huggingface.co/microsoft/BiomedParse. Please expect future updates of the model as we are making it more robust and powerful based on feedbacks from the community. We recomment using the latest version of the model. Please put the model in the pretrained folder when runing the code. +## Finetune on Your Own Data +While BiomedParse can take in arbitrary image and text prompt, it can only reasonably segment the targets that it has learned during pretraining! If you have a specific segmentation task that the latest checkpint doesn't do well, here is the instruction on how to finetune it on your own data. ### Raw Image and Annotation -For each dataset, put the raw image and mask files in the following format +BiomedParse expects images and ground truth masks in 1024x1024 PNG format. For each dataset, put the raw image and mask files in the following format ``` ├── biomedparse_datasets ├── YOUR_DATASET_NAME @@ -61,7 +49,6 @@ For each dataset, put the raw image and mask files in the following format    ├── test    └── test_mask ``` - Each folder should contain .png files. The mask files should be binary images where pixels != 0 indicates the foreground region. ### File Name Convention @@ -81,8 +68,12 @@ One image can be associated with multiple masks corresponding to multiple target [TARGET] is the name of the target with spaces replaced by '+'. E.g. "tube" or "chest+tube". Make sure "_" doesn't appear in [TARGET]. ### Get Final Data File with Text Prompts -In biomedparse_datasets/create-customer-datasets.py, specify YOUR_DATASET_NAME. -Once the create-custom-coco-dataset script is run, the dataset folder should be of the following format +In biomedparse_datasets/create-customer-datasets.py, specify YOUR_DATASET_NAME. Run the script with +``` +cd biomedparse_datasets +python create-customer-datasets.py +``` +After that, the dataset folder should be of the following format ``` ├── dataset_name    ├── train @@ -97,28 +88,24 @@ Once the create-custom-coco-dataset script is run, the dataset folder should be In datasets/registration/register_biomed_datasets.py, simply add YOUR_DATASET_NAME to the datasets list. Registered datasets are ready to be added to the training and evaluation config file configs/biomed_seg_lang_v1.yaml. Your training dataset is registered as biomed_YOUR_DATASET_NAME_train, and your test dataset is biomed_YOUR_DATASET_NAME_test. -## Training - -To train the model using the example BiomedParseData-Demo, run: +## Train BiomedParse +To train the BiomedParse model, run: ```sh bash assets/scripts/train.sh ``` +This will continue train the model using the training datasets you specified in configs/biomed_seg_lang_v1.yaml -### Customizing Training Settings -See Training Parameters section for example. - -## Evaluation - -To evaluate the model on the example BiomedParseData-Demo, run: - +## Evaluate BiomedParse +To evaluate the model, run: ```sh bash assets/scripts/eval.sh ``` +This will continue evaluate the model on the test datasets you specified in configs/biomed_seg_lang_v1.yaml. We put BiomedParseData-Demo as the default. You can add any other datasets in the list. -## Inference +## Run Inference Example inference code is provided in `example_prediction.py`. We provided example images in `examples` to load from. Model checkpoint is provided in `pretrained` to load from. Model configuration is provided in `configs/biomedparse_inference.yaml`. - + ### Model Setup ```sh from PIL import Image @@ -170,69 +157,32 @@ Detection and recognition inference code are provided in `inference_utils/output -## Reproducing Results -To reproduce the exact results presented in the paper, use the following table of parameters and configurations: + +## Usage and License Notices +The model described in this repository is provided for research and development use only. The model is not intended for use in clinical decision-making or for any other clinical use, and the performance of the model for clinical use has not been established. You bear sole responsibility for any use of this model, including incorporation into any product intended for clinical use. diff --git a/assets/readmes/DATASET.md b/assets/readmes/DATASET.md new file mode 100644 index 0000000..17e7158 --- /dev/null +++ b/assets/readmes/DATASET.md @@ -0,0 +1,42 @@ +# **BiomedParseData** + +BiomedParseData was created from preprocessing publicly available biomedical image segmentation datasets. + +These datasets are provided pre-formatted for convenience. For additional information about the datasets or their licenses, please reach out to the owners: +| Dataset | URL | +|---------------------------------------|-----| +| amos22 | [https://amos22.grand-challenge.org/](https://amos22.grand-challenge.org/) | +| MSD (Medical Segmentation Decathlon) | [http://medicaldecathlon.com/](http://medicaldecathlon.com/) | +| KiTS23 | [https://github.com/neheller/kits23](https://github.com/neheller/kits23) | +| BTCV | [https://www.synapse.org/#!Synapse:syn3193805/wiki/217790](https://www.synapse.org/#!Synapse:syn3193805/wiki/217790) | +| COVID-19 CT | [https://www.kaggle.com/datasets/andrewmvd/covid19-ct-scans](https://www.kaggle.com/datasets/andrewmvd/covid19-ct-scans) | +| LIDR-IDRI | [https://wiki.cancerimagingarchive.net/display/Public/LIDC-IDRI](https://wiki.cancerimagingarchive.net/display/Public/LIDC-IDRI) | +| ACDC | [https://www.creatis.insa-lyon.fr/Challenge/acdc/databases.html](https://www.creatis.insa-lyon.fr/Challenge/acdc/databases.html) | +| M&Ms | [https://www.ub.edu/mnms/](https://www.ub.edu/mnms/) | +| PROMISE12 | [cite https://doi.org/10.1016/j.media.2013.12.002](https://doi.org/10.1016/j.media.2013.12.002) | +| LGG | [https://www.kaggle.com/datasets/mateuszbuda/lgg-mri-segmentation](https://www.kaggle.com/datasets/mateuszbuda/lgg-mri-segmentation) | +| COVID-QU-Ex | [https://www.kaggle.com/datasets/anasmohammedtahir/covidqu](https://www.kaggle.com/datasets/anasmohammedtahir/covidqu) | +| QaTa-COV19 | [https://www.kaggle.com/datasets/aysendegerli/qatacov19-dataset](https://www.kaggle.com/datasets/aysendegerli/qatacov19-dataset) | +| SIIM-ACR Pneumothorax Segmentation | [https://www.kaggle.com/datasets/vbookshelf/pneumothorax-chest-xray-images-and-masks](https://www.kaggle.com/datasets/vbookshelf/pneumothorax-chest-xray-images-and-masks) | +| Chest Xray Masks and Labels Dataset | [https://datasetninja.com/chest-xray](https://datasetninja.com/chest-xray) | +| COVID-19 Radiography Database | [https://www.kaggle.com/datasets/tawsifurrahman/covid19-radiography-database](https://www.kaggle.com/datasets/tawsifurrahman/covid19-radiography-database) | +| CAMUS | [https://www.creatis.insa-lyon.fr/Challenge/camus/index.html](https://www.creatis.insa-lyon.fr/Challenge/camus/index.html) | +| BUSI | [https://scholar.cu.edu.eg/?q=afahmy/pages/dataset](https://scholar.cu.edu.eg/?q=afahmy/pages/dataset) | +| FH-PS-AOP | [https://zenodo.org/records/7851339#.ZEH6eHZBztU](https://zenodo.org/records/7851339#.ZEH6eHZBztU) | +| CDD-CESM | [https://www.cancerimagingarchive.net/collection/cdd-cesm/](https://www.cancerimagingarchive.net/collection/cdd-cesm/) | +| PolypGen | [https://www.synapse.org/#!Synapse:syn26376615/wiki/613312](https://www.synapse.org/#!Synapse:syn26376615/wiki/613312) | +| NeoPolyp | [https://www.kaggle.com/c/bkai-igh-neopolyp/data](https://www.kaggle.com/c/bkai-igh-neopolyp/data) | +| ISIC 2018 | [https://challenge2018.isic-archive.com/task1/](https://challenge2018.isic-archive.com/task1/) | +| UwaterlooSkinCancer | [Skin Cancer Detection \| Vision and Image Processing Lab \| University of Waterloo](https://uwaterloo.ca) | +| OCT-CME | [https://www.kaggle.com/datasets/zeeshanahmed13/intraretinal-cystoid-fluid](https://www.kaggle.com/datasets/zeeshanahmed13/intraretinal-cystoid-fluid) | +| REFUGE | [https://bitbucket.org/woalsdnd/refuge/src](https://bitbucket.org/woalsdnd/refuge/src) | +| G1020 | [https://www.dfki.uni-kl.de/g1020](https://www.dfki.uni-kl.de/g1020) | +| DRIVE | [https://drive.grand-challenge.org/](https://drive.grand-challenge.org/) | +| GlaS | [https://warwick.ac.uk/fac/cross_fac/tia/data/glascontest/](https://warwick.ac.uk/fac/cross_fac/tia/data/glascontest/) | +| PanNuke | [https://jgamper.github.io/PanNukeDataset/](https://jgamper.github.io/PanNukeDataset/) | +| FUMPE | [https://figshare.com/collections/FUMPE/4107803/1](https://figshare.com/collections/FUMPE/4107803/1) | +| TotalSegmentator | [https://github.com/wasserth/TotalSegmentator](https://github.com/wasserth/TotalSegmentator) | +| BraTS2023 | [https://www.synapse.org/#!Synapse:syn51156910/wiki/621282](https://www.synapse.org/#!Synapse:syn51156910/wiki/621282) | +| AbdomenCT-1K | [https://github.com/JunMa11/AbdomenCT-1K](https://github.com/JunMa11/AbdomenCT-1K) | +| US Simulation & Segmentation | [https://www.kaggle.com/datasets/ignaciorlando/ussimandsegm](https://www.kaggle.com/datasets/ignaciorlando/ussimandsegm) | +| CDD-CESM | [https://www.cancerimagingarchive.net/collection/cdd-cesm/](https://www.cancerimagingarchive.net/collection/cdd-cesm/) | diff --git a/assets/requirements/requirements.txt b/assets/requirements/requirements.txt index 10620c9..d62bda8 100755 --- a/assets/requirements/requirements.txt +++ b/assets/requirements/requirements.txt @@ -7,14 +7,14 @@ scikit-learn==1.3.1 pandas==2.0.3 timm==0.4.12 numpy==1.26.4 -einops==0.7.0 +einops==0.8.0 fvcore==0.1.5.post20221221 transformers==4.34.0 sentencepiece==0.1.99 ftfy==6.1.1 regex==2023.10.3 nltk==3.8.1 -#mpi4py==3.1.5 +mpi4py==3.1.5 vision-datasets==0.2.2 cython==3.0.2 pycocotools==2.0.7 @@ -29,11 +29,9 @@ kornia==0.7.0 deepspeed==0.10.3 #wandb==0.15.12 infinibatch==0.1.1 +open-clip-torch==2.26.1 +git+https://github.com/MaureenZOU/detectron2-xyz.git #gradio==3.42.0 -# --extra-index-url https://download.pytorch.org/whl/cu124 -# torch -# torchvision -# torchaudio #torch==2.3.1 #2.0.1 #torchvision==0.15.2 #torchaudio==2.0.2 diff --git a/assets/requirements/requirements_custom.txt b/assets/requirements/requirements_custom.txt index 18a45b7..cf4208c 100755 --- a/assets/requirements/requirements_custom.txt +++ b/assets/requirements/requirements_custom.txt @@ -1,6 +1,6 @@ git+https://github.com/cocodataset/panopticapi.git git+https://github.com/openai/CLIP.git -git+https://github.com/arogozhnikov/einops.git +#git+https://github.com/arogozhnikov/einops.git #git+https://github.com/facebookresearch/detectron2.git git+https://github.com/MaureenZOU/detectron2-xyz.git #git+https://github.com/openai/whisper.git \ No newline at end of file diff --git a/datasets/build.py b/datasets/build.py index 93e4834..f95da8f 100755 --- a/datasets/build.py +++ b/datasets/build.py @@ -48,7 +48,7 @@ InteractiveEvaluator, ) from modeling.utils import configurable -from utils.distributed import get_world_size +from utilities.distributed import get_world_size class JointLoader(torchdata.IterableDataset): """ diff --git a/datasets/dataset_mappers/biomed_dataset_mapper.py b/datasets/dataset_mappers/biomed_dataset_mapper.py index e833da5..36b4d44 100755 --- a/datasets/dataset_mappers/biomed_dataset_mapper.py +++ b/datasets/dataset_mappers/biomed_dataset_mapper.py @@ -18,7 +18,7 @@ from detectron2.data import MetadataCatalog from pycocotools import mask as coco_mask -from utils import prompt_engineering +from utilities import prompt_engineering from modeling.language import build_tokenizer from modeling.language.misc import text_noun_with_prompt_all from modeling.utils import configurable diff --git a/datasets/evaluation/classification_evaluation.py b/datasets/evaluation/classification_evaluation.py index bf5386b..db9e2d9 100755 --- a/datasets/evaluation/classification_evaluation.py +++ b/datasets/evaluation/classification_evaluation.py @@ -11,8 +11,8 @@ from detectron2.evaluation.evaluator import DatasetEvaluator -from utils.misc import AverageMeter -from utils.distributed import get_world_size +from utilities.misc import AverageMeter +from utilities.distributed import get_world_size @torch.no_grad() diff --git a/datasets/evaluation/segmentation_evaluation.py b/datasets/evaluation/segmentation_evaluation.py index 38d6781..98a14ce 100755 --- a/datasets/evaluation/segmentation_evaluation.py +++ b/datasets/evaluation/segmentation_evaluation.py @@ -13,7 +13,7 @@ from detectron2.utils.comm import all_gather, is_main_process from detectron2.utils.file_io import PathManager from detectron2.evaluation.evaluator import DatasetEvaluator -from utils.distributed import synchronize +from utilities.distributed import synchronize from ..semseg_loader import load_semseg diff --git a/entry.py b/entry.py index 5475134..ce80ee7 100755 --- a/entry.py +++ b/entry.py @@ -13,7 +13,7 @@ import random import numpy as np -from utils.arguments import load_opt_command +from utilities.arguments import load_opt_command logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) diff --git a/environment.yml b/environment.yml index 64ffe9f..d56e485 100644 --- a/environment.yml +++ b/environment.yml @@ -103,11 +103,11 @@ dependencies: - antlr4-python3-runtime==4.9.3 - appdirs==1.4.4 - black==21.4b2 - - clip==1.0 + - open-clip-torch==2.26.1 - cloudpickle==3.0.0 - cython==3.0.2 - deepspeed==0.10.3 - - detectron2==0.6 + - git+https://github.com/MaureenZOU/detectron2-xyz.git - diffdist==0.1 - einops==0.8.0 - ftfy==6.1.1 @@ -129,12 +129,10 @@ dependencies: - omegaconf==2.3.0 - opencv-python==4.8.1.78 - pandas==2.0.3 - - panopticapi==0.1 - pathspec==0.12.1 - pillow==9.4.0 - portalocker==2.10.1 - py-cpuinfo==9.0.0 - - pyarrow==13.0.0 - pycocotools==2.0.7 - pydantic==1.10.18 - pydot==3.0.1 diff --git a/example_prediction.py b/example_prediction.py index 33cba63..aa8a28c 100644 --- a/example_prediction.py +++ b/example_prediction.py @@ -3,9 +3,9 @@ import argparse from modeling.BaseModel import BaseModel from modeling import build_model -from utils.distributed import init_distributed -from utils.arguments import load_opt_from_config_files -from utils.constants import BIOMED_CLASSES +from utilities.distributed import init_distributed +from utilities.arguments import load_opt_from_config_files +from utilities.constants import BIOMED_CLASSES from inference_utils.inference import interactive_infer_image @@ -19,12 +19,11 @@ def parse_option(): cfg = parse_option() opt = load_opt_from_config_files([cfg.conf_files]) -#opt = init_distributed(opt) -opt['device'] = torch.device('cuda') +opt = init_distributed(opt) # Load model from pretrained weights -pretrained_pth = 'pretrained/biomedparse_v2.pt' +pretrained_pth = 'pretrained/biomed_parse.pt' model = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth).eval().cuda() with torch.no_grad(): diff --git a/inference_utils/inference.py b/inference_utils/inference.py index 13b4c45..2145971 100644 --- a/inference_utils/inference.py +++ b/inference_utils/inference.py @@ -8,7 +8,7 @@ # from detectron2.data import MetadataCatalog # from detectron2.structures import BitMasks from modeling.language.loss import vl_similarity -from utils.constants import BIOMED_CLASSES +from utilities.constants import BIOMED_CLASSES #from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES # import cv2 diff --git a/inference_utils/processing_utils.py b/inference_utils/processing_utils.py new file mode 100644 index 0000000..d47ef98 --- /dev/null +++ b/inference_utils/processing_utils.py @@ -0,0 +1,182 @@ +import numpy as np +from skimage import transform +import pydicom +from io import BytesIO +from PIL import Image +import nibabel as nib +import SimpleITK as sitk +from skimage import measure + + +""" + This script contains utility functions for reading and processing different imaging modalities. +""" + + +CT_WINDOWS = {'abdomen': [-150, 250], + 'lung': [-1000, 1000], + 'pelvis': [-55, 200], + 'liver': [-25, 230], + 'colon': [-68, 187], + 'pancreas': [-100, 200]} + +def process_intensity_image(image_data, is_CT, site=None): + # process intensity-based image. If CT, apply site specific windowing + + # image_data: 2D numpy array of shape (H, W) + + # return: 3-channel numpy array of shape (H, W, 3) as model input + + if is_CT: + # process image with windowing + if site and site in CT_WINDOWS: + window = CT_WINDOWS[site] + else: + raise ValueError(f'Please choose CT site from {CT_WINDOWS.keys()}') + lower_bound, upper_bound = window + else: + # process image with intensity range 0.5-99.5 percentile + lower_bound, upper_bound = np.percentile( + image_data[image_data > 0], 0.5 + ), np.percentile(image_data[image_data > 0], 99.5) + + image_data_pre = np.clip(image_data, lower_bound, upper_bound) + image_data_pre = ( + (image_data_pre - image_data_pre.min()) + / (image_data_pre.max() - image_data_pre.min()) + * 255.0 + ) + + # pad to square with equal padding on both sides + shape = image_data_pre.shape + if shape[0] > shape[1]: + pad = (shape[0]-shape[1])//2 + pad_width = ((0,0), (pad, pad)) + elif shape[0] < shape[1]: + pad = (shape[1]-shape[0])//2 + pad_width = ((pad, pad), (0,0)) + else: + pad_width = None + + if pad_width is not None: + image_data_pre = np.pad(image_data_pre, pad_width, 'constant', constant_values=0) + + # resize image to 1024x1024 + image_size = 1024 + resize_image = transform.resize(image_data_pre, (image_size, image_size), order=3, + mode='constant', preserve_range=True, anti_aliasing=True) + + # convert to 3-channel image + resize_image = np.stack([resize_image]*3, axis=-1) + + return resize_image.astype(np.uint8) + + + +def read_dicom(image_path, is_CT, site=None): + # read dicom file and return pixel data + + # dicom_file: str, path to dicom file + # is_CT: bool, whether image is CT or not + # site: str, one of CT_WINDOWS.keys() + # return: 2D numpy array of shape (H, W) + + ds = pydicom.dcmread(image_path) + image_array = ds.pixel_array * ds.RescaleSlope + ds.RescaleIntercept + + image_array = process_intensity_image(image_array, is_CT, site) + + return image_array + + +def read_nifti(image_path, is_CT, slice_idx, site=None, HW_index=(0, 1), channel_idx=None): + # read nifti file and return pixel data + + # image_path: str, path to nifti file + # is_CT: bool, whether image is CT or not + # slice_idx: int, slice index to read + # site: str, one of CT_WINDOWS.keys() + # HW_index: tuple, index of height and width in the image shape + # return: 2D numpy array of shape (H, W) + + + nii = nib.load(image_path) + image_array = nii.get_fdata() + + if HW_index != (0, 1): + image_array = np.moveaxis(image_array, HW_index, (0, 1)) + + # get slice + if channel_idx is None: + image_array = image_array[:, :, slice_idx] + else: + image_array = image_array[:, :, slice_idx, channel_idx] + + image_array = process_intensity_image(image_array, is_CT, site) + return image_array + + + +def read_rgb(image_path): + # read RGB image and return resized pixel data + + # image_path: str, path to RGB image + # return: BytesIO buffer + + # read image into numpy array + image = Image.open(image_path) + image = np.array(image) + if len(image.shape) == 2: + image = np.stack([image]*3, axis=-1) + elif image.shape[2] == 4: + image = image[:,:,:3] + + # pad to square with equal padding on both sides + shape = image.shape + if shape[0] > shape[1]: + pad = (shape[0]-shape[1])//2 + pad_width = ((0,0), (pad, pad), (0,0)) + elif shape[0] < shape[1]: + pad = (shape[1]-shape[0])//2 + pad_width = ((pad, pad), (0,0), (0,0)) + else: + pad_width = None + + if pad_width is not None: + image = np.pad(image, pad_width, 'constant', constant_values=0) + + # resize image to 1024x1024 for each channel + image_size = 1024 + resize_image = np.zeros((image_size, image_size, 3), dtype=np.uint8) + for i in range(3): + resize_image[:,:,i] = transform.resize(image[:,:,i], (image_size, image_size), order=3, + mode='constant', preserve_range=True, anti_aliasing=True) + + return resize_image + + + +def get_instances(mask): + # get intances from binary mask + seg = sitk.GetImageFromArray(mask) + filled = sitk.BinaryFillhole(seg) + d = sitk.SignedMaurerDistanceMap(filled, insideIsPositive=False, squaredDistance=False, useImageSpacing=False) + + ws = sitk.MorphologicalWatershed( d, markWatershedLine=False, level=1) + ws = sitk.Mask( ws, sitk.Cast(seg, ws.GetPixelID())) + ins_mask = sitk.GetArrayFromImage(ws) + + # filter out instances with small area outliers + props = measure.regionprops_table(ins_mask, properties=('label', 'area')) + mean_area = np.mean(props['area']) + std_area = np.std(props['area']) + + threshold = mean_area - 2*std_area - 1 + ins_mask_filtered = ins_mask.copy() + for i, area in zip(props['label'], props['area']): + if area < threshold: + ins_mask_filtered[ins_mask == i] = 0 + + return ins_mask_filtered + + \ No newline at end of file diff --git a/modeling/BaseModel.py b/modeling/BaseModel.py index a88631e..010e259 100755 --- a/modeling/BaseModel.py +++ b/modeling/BaseModel.py @@ -4,7 +4,7 @@ import torch import torch.nn as nn -from utils.model import align_and_update_state_dicts +from utilities.model import align_and_update_state_dicts logger = logging.getLogger(__name__) diff --git a/modeling/architectures/seem_model_demo.py b/modeling/architectures/seem_model_demo.py index 6e06b17..f1786d5 100755 --- a/modeling/architectures/seem_model_demo.py +++ b/modeling/architectures/seem_model_demo.py @@ -25,8 +25,8 @@ from ..modules import sem_seg_postprocess, SetCriterion, HungarianMatcher, bbox_postprocess from ..language import build_language_encoder from ..language.loss import vl_similarity -from utils.prompt_engineering import prompt_engineering -from utils.constants import COCO_PANOPTIC_CLASSES +from utilities.prompt_engineering import prompt_engineering +from utilities.constants import COCO_PANOPTIC_CLASSES class GeneralizedSEEM(nn.Module): @@ -734,7 +734,7 @@ def prepare_targets(self, batched_inputs, images): tokens = gtext['tokens'] unique_hash_id = np.unique(grd_hash, return_index=True)[1] - selected_mask = np.zeros(len(grd_hash)).astype(np.bool) + selected_mask = np.zeros(len(grd_hash)).astype(bool) selected_mask[unique_hash_id] = True selected_token_emb = token_emb[selected_mask] diff --git a/modeling/architectures/seem_model_v0.py b/modeling/architectures/seem_model_v0.py index 3f71a9d..a10f475 100755 --- a/modeling/architectures/seem_model_v0.py +++ b/modeling/architectures/seem_model_v0.py @@ -25,8 +25,8 @@ from ..modules import sem_seg_postprocess, SetCriterion, HungarianMatcher, bbox_postprocess from ..language import build_language_encoder from ..language.loss import vl_similarity -from utils.prompt_engineering import prompt_engineering -from utils.constants import COCO_PANOPTIC_CLASSES +from utilities.prompt_engineering import prompt_engineering +from utilities.constants import COCO_PANOPTIC_CLASSES class GeneralizedSEEM(nn.Module): diff --git a/modeling/architectures/seem_model_v1.py b/modeling/architectures/seem_model_v1.py index 1521971..6d762c3 100755 --- a/modeling/architectures/seem_model_v1.py +++ b/modeling/architectures/seem_model_v1.py @@ -25,8 +25,8 @@ from ..modules import sem_seg_postprocess, SetCriterion, HungarianMatcher, bbox_postprocess from ..language import build_language_encoder from ..language.loss import vl_similarity -from utils.prompt_engineering import prompt_engineering -from utils.constants import COCO_PANOPTIC_CLASSES, BIOMED_CLASSES +from utilities.prompt_engineering import prompt_engineering +from utilities.constants import COCO_PANOPTIC_CLASSES, BIOMED_CLASSES class GeneralizedSEEM(nn.Module): @@ -957,7 +957,7 @@ def prepare_targets(self, batched_inputs, images): tokens = gtext['tokens'] unique_hash_id = np.unique(grd_hash, return_index=True)[1] - selected_mask = np.zeros(len(grd_hash)).astype(np.bool) + selected_mask = np.zeros(len(grd_hash)).astype(bool) selected_mask[unique_hash_id] = True selected_token_emb = token_emb[selected_mask] diff --git a/modeling/architectures/xdecoder_model.py b/modeling/architectures/xdecoder_model.py index d38b858..32cd76a 100755 --- a/modeling/architectures/xdecoder_model.py +++ b/modeling/architectures/xdecoder_model.py @@ -26,8 +26,8 @@ from ..modules import sem_seg_postprocess, SetCriterion, HungarianMatcher, bbox_postprocess from ..language import build_language_encoder from ..language.loss import vl_similarity, image_text_contrastive_loss_queue -from utils.prompt_engineering import prompt_engineering -from utils.constants import COCO_PANOPTIC_CLASSES +from utilities.prompt_engineering import prompt_engineering +from utilities.constants import COCO_PANOPTIC_CLASSES st = LancasterStemmer() diff --git a/modeling/language/LangEncoder/transformer.py b/modeling/language/LangEncoder/transformer.py index f5f599f..f921721 100755 --- a/modeling/language/LangEncoder/transformer.py +++ b/modeling/language/LangEncoder/transformer.py @@ -11,8 +11,8 @@ from timm.models.layers import DropPath, trunc_normal_ from .build import register_lang_encoder -from utils.distributed import is_main_process -from utils.model import register_norm_module +from utilities.distributed import is_main_process +from utilities.model import register_norm_module logger = logging.getLogger(__name__) diff --git a/modeling/language/misc.py b/modeling/language/misc.py index c64aaa8..2f1aa8c 100755 --- a/modeling/language/misc.py +++ b/modeling/language/misc.py @@ -4,7 +4,7 @@ import nltk import numpy as np -from utils.constants import IMAGENET_DEFAULT_TEMPLATES +from utilities.constants import IMAGENET_DEFAULT_TEMPLATES nltk.download('punkt', quiet=True) nltk.download('averaged_perceptron_tagger', quiet=True) diff --git a/modeling/language/vlpencoder.py b/modeling/language/vlpencoder.py index 32f29b0..bbf1932 100755 --- a/modeling/language/vlpencoder.py +++ b/modeling/language/vlpencoder.py @@ -14,7 +14,7 @@ from .build import register_model from ..utils import configurable from .LangEncoder import build_tokenizer, build_lang_encoder -from utils.prompt_engineering import prompt_engineering, get_prompt_templates +from utilities.prompt_engineering import prompt_engineering, get_prompt_templates from transformers import AutoTokenizer, AutoModel diff --git a/modeling/utils/misc.py b/modeling/utils/misc.py index 02da5e0..9f3fd93 100755 --- a/modeling/utils/misc.py +++ b/modeling/utils/misc.py @@ -23,7 +23,7 @@ from detectron2.layers import cat, shapes_to_tensor -from utils.constants import * +from utilities.constants import * def pad_arbitrary_tensors(tensors, padding_value=0.): diff --git a/pipeline/XDecoderPipeline.py b/pipeline/XDecoderPipeline.py index c04aba4..5ea586d 100755 --- a/pipeline/XDecoderPipeline.py +++ b/pipeline/XDecoderPipeline.py @@ -27,8 +27,8 @@ from modeling.utils import get_class_names from modeling.BaseModel import BaseModel from datasets import build_evaluator, build_eval_dataloader, build_train_dataloader -from utils.distributed import is_main_process -from utils.constants import COCO_PANOPTIC_CLASSES +from utilities.distributed import is_main_process +from utilities.constants import COCO_PANOPTIC_CLASSES from trainer.utils.misc import move_batch_to_device, cast_batch_to_half from .utils.misc import hook_metadata, hook_switcher, hook_opt diff --git a/trainer/utils_trainer.py b/trainer/utils_trainer.py index c49c645..b1f7863 100755 --- a/trainer/utils_trainer.py +++ b/trainer/utils_trainer.py @@ -30,7 +30,7 @@ from .distributed_trainer import DistributedTrainer from .utils.misc import * from .utils.serialization import JSONEncoder, filter_jsonable -from utils.distributed import get_world_size +from utilities.distributed import get_world_size logger = logging.getLogger(__name__) diff --git a/trainer/xdecoder_trainer.py b/trainer/xdecoder_trainer.py index f26fdfa..18283c4 100755 --- a/trainer/xdecoder_trainer.py +++ b/trainer/xdecoder_trainer.py @@ -23,7 +23,7 @@ from fvcore.common.config import CfgNode from infinibatch import iterators -from utils.distributed import is_main_process, get_world_size +from utilities.distributed import is_main_process, get_world_size from .default_trainer import DefaultTrainer from .utils.serialization import JSONEncoder, filter_jsonable diff --git a/utils/Config.py b/utilities/Config.py similarity index 100% rename from utils/Config.py rename to utilities/Config.py diff --git a/utils/__init__.py b/utilities/__init__.py similarity index 100% rename from utils/__init__.py rename to utilities/__init__.py diff --git a/utils/arguments.py b/utilities/arguments.py similarity index 100% rename from utils/arguments.py rename to utilities/arguments.py diff --git a/utils/constants.py b/utilities/constants.py similarity index 100% rename from utils/constants.py rename to utilities/constants.py diff --git a/utils/dataset.py b/utilities/dataset.py similarity index 100% rename from utils/dataset.py rename to utilities/dataset.py diff --git a/utils/distributed.py b/utilities/distributed.py similarity index 99% rename from utils/distributed.py rename to utilities/distributed.py index 906b386..b002591 100755 --- a/utils/distributed.py +++ b/utilities/distributed.py @@ -4,7 +4,7 @@ import pickle import subprocess -#from mpi4py import MPI +from mpi4py import MPI import torch.distributed as dist diff --git a/utils/misc.py b/utilities/misc.py similarity index 100% rename from utils/misc.py rename to utilities/misc.py diff --git a/utils/model.py b/utilities/model.py similarity index 97% rename from utils/model.py rename to utilities/model.py index 50fa674..f87e38b 100755 --- a/utils/model.py +++ b/utilities/model.py @@ -5,7 +5,7 @@ import torch import torch.nn as nn -from utils.distributed import is_main_process +from utilities.distributed import is_main_process logger = logging.getLogger(__name__) diff --git a/utils/prompt_engineering.py b/utilities/prompt_engineering.py similarity index 100% rename from utils/prompt_engineering.py rename to utilities/prompt_engineering.py diff --git a/utils/visualizer.py b/utilities/visualizer.py similarity index 100% rename from utils/visualizer.py rename to utilities/visualizer.py