diff --git a/Predict-Lung-Disease-master/000_preprocess.py b/Predict-Lung-Disease-master/000_preprocess.py new file mode 100644 index 0000000..78e182f --- /dev/null +++ b/Predict-Lung-Disease-master/000_preprocess.py @@ -0,0 +1,164 @@ +import sys, os +import azure_chestxray_utils +import pickle +import random +import re +import tqdm +import cv2 +import numpy as np +import pandas as pd +import sklearn.model_selection +from collections import Counter + +paths_to_append = [os.path.join(os.getcwd(), os.path.join(*(['Code', 'src'])))] +def add_path_to_sys_path(path_to_append): + if not (any(path_to_append in paths for paths in sys.path)): + sys.path.append(path_to_append) + +[add_path_to_sys_path(crt_path) for crt_path in paths_to_append] + +path= os.getcwd()+r'\azure-share' +isExists=os.path.exists(path) +if not isExists: + amlWBSharedDir = os.mkdir(path) +else: + amlWBSharedDir = path + + + + +prj_consts = azure_chestxray_utils.chestxray_consts() +print(prj_consts) + +data_base_input_dir=os.path.join(amlWBSharedDir, os.path.join(*(prj_consts.BASE_INPUT_DIR_list))) +data_base_output_dir=os.path.join(amlWBSharedDir, os.path.join(*(prj_consts.BASE_OUTPUT_DIR_list))) + +isExists1 = os.path.exists(data_base_input_dir) +isExists2 = os.path.exists(data_base_output_dir) + +if not isExists1: + data_base_input_dir = os.mkdir(data_base_input_dir) +print(data_base_input_dir) + +if not isExists2: + data_base_output_dir = os.mkdir(data_base_output_dir) +print(data_base_output_dir) + +nih_chest_xray_data_dir=os.path.join(data_base_input_dir, + os.path.join(*(prj_consts.ChestXray_IMAGES_DIR_list))) +isExists3 = os.path.exists(nih_chest_xray_data_dir) +if not isExists3: + nih_chest_xray_data_dir = os.mkdir(nih_chest_xray_data_dir) + +print(nih_chest_xray_data_dir) + +other_data_dir=os.path.join(data_base_input_dir, os.path.join(*(prj_consts.ChestXray_OTHER_DATA_DIR_list))) +data_partitions_dir=os.path.join(data_base_output_dir, os.path.join(*(prj_consts.DATA_PARTITIONS_DIR_list))) + +ignored_images_set = set() + +total_patient_number = 30805 +NIH_annotated_file = 'BBox_List_2017.csv' # exclude from train pathology annotated by radiologists +manually_selected_bad_images_file = 'blacklist.csv'# exclude what viusally looks like bad images + +patient_id_original = [i for i in range(1,total_patient_number + 1)] + +bbox_df = pd.read_csv(os.path.join(other_data_dir, NIH_annotated_file)) +bbox_patient_index_df = bbox_df['Image Index'].str.slice(3, 8) + +bbox_patient_index_list = [] +for index, item in bbox_patient_index_df.iteritems(): + bbox_patient_index_list.append(int(item)) + +patient_id = list(set(patient_id_original) - set(bbox_patient_index_list)) +print("len of original patient id is", len(patient_id_original)) +print("len of cleaned patient id is", len(patient_id)) +print("len of unique patient id with annotated data", + len(list(set(bbox_patient_index_list)))) +print("len of patient id with annotated data",bbox_df.shape[0]) + +random.seed(0) +random.shuffle(patient_id) + +print("first ten patient ids are", patient_id[:10]) + +# training:valid:test=7:1:2 +patient_id_train = patient_id[:int(total_patient_number * 0.7)] +patient_id_valid = patient_id[int(total_patient_number * 0.7):int(total_patient_number * 0.8)] +# get the rest of the patient_id as the test set +patient_id_test = patient_id[int(total_patient_number * 0.8):] +patient_id_test.extend(bbox_patient_index_list) +patient_id_test = list(set(patient_id_test)) + +print("train:{} valid:{} test:{}".format(len(patient_id_train), len(patient_id_valid), len(patient_id_test))) + +pathologies_name_list = prj_consts.DISEASE_list +NIH_patients_and_labels_file = 'Data_Entry_2017.csv' + +labels_df = pd.read_csv(os.path.join(other_data_dir, NIH_patients_and_labels_file)) + + +#show the label distribution + +# Unique IDs frequencies can be computed using list comprehension or collections lib +# [[x,(list(crtData['fullID2'])).count(x)] for x in set(crtData['fullID2'])] +# for tallying, collections lib is faster than list comprehension +pathology_distribution = Counter(list(labels_df['Finding Labels'])) + +# Sort it by ID frequency (dict value) +sorted_by_freq = sorted(pathology_distribution.items(), key=lambda x: x[1], reverse=True) +print(len(sorted_by_freq)) +print(sorted_by_freq[:20]) +print(sorted_by_freq[-10:]) + +print(labels_df['Finding Labels'].str.split( '|', expand=False).str.join(sep='*').str.get_dummies(sep='*').sum()) + +def process_data(current_df, patient_ids): + image_name_index = [] + image_labels = {} + for individual_patient in tqdm.tqdm(patient_ids): + for _, row in current_df[current_df['Patient ID'] == individual_patient].iterrows(): + processed_image_name = row['Image Index'] + if processed_image_name in ignored_images_set: + pass + else: + image_name_index.append(processed_image_name) + image_labels[processed_image_name] = np.zeros(14, dtype=np.uint8) + for disease_index, ele in enumerate(pathologies_name_list): + if re.search(ele, row['Finding Labels'], re.IGNORECASE): + image_labels[processed_image_name][disease_index] = 1 + else: + # redundant code but just to make it more readable + image_labels[processed_image_name][disease_index] = 0 + # print("processed", row['Image Index']) + return image_name_index, image_labels + + +train_data_index, train_labels = process_data(labels_df, patient_id_train) +valid_data_index, valid_labels = process_data(labels_df, patient_id_valid) +test_data_index, test_labels = process_data(labels_df, patient_id_test) + +print("train, valid, test image number is:", len(train_data_index), len(valid_data_index), len(test_data_index)) + +# save the data +labels_all = {} +labels_all.update(train_labels) +labels_all.update(valid_labels) +labels_all.update(test_labels) + +partition_dict = {'train': train_data_index, 'test': test_data_index, 'valid': valid_data_index} + +with open(os.path.join(data_partitions_dir, 'labels14_unormalized_cleaned.pickle'), 'wb') as f: + pickle.dump(labels_all, f) + +with open(os.path.join(data_partitions_dir, 'partition14_unormalized_cleaned.pickle'), 'wb') as f: + pickle.dump(partition_dict, f) + +# also save the patient id partitions for pytorch training +with open(os.path.join(data_partitions_dir, 'train_test_valid_data_partitions.pickle'), 'wb') as f: + pickle.dump([patient_id_train, patient_id_valid, + patient_id_test, + list(set(bbox_patient_index_list))], f) + +print(type(train_labels)) +print({k: train_labels[k] for k in list(train_labels)[:5]}) \ No newline at end of file diff --git a/Predict-Lung-Disease-master/020_evaluate.py b/Predict-Lung-Disease-master/020_evaluate.py new file mode 100644 index 0000000..0064c2c --- /dev/null +++ b/Predict-Lung-Disease-master/020_evaluate.py @@ -0,0 +1,157 @@ +import sys, os +import azure_chestxray_utils +import azure_chestxray_keras_utils +from keras.models import load_model +import os +import pickle +import cv2 +import numpy as np +import pandas as pd +from keras.models import load_model +from keras.utils import Sequence +from sklearn import metrics +from tensorflow.python.client import device_lib +import keras_contrib + +path = os.getcwd()+r'\azure-share' +amlWBSharedDir = path + +prj_consts = azure_chestxray_utils.chestxray_consts() +data_base_input_dir=os.path.join(amlWBSharedDir, + os.path.join(*(prj_consts.BASE_INPUT_DIR_list))) +data_base_output_dir=os.path.join(amlWBSharedDir, + os.path.join(*(prj_consts.BASE_OUTPUT_DIR_list))) +weights_dir = os.path.join(data_base_output_dir, os.path.join(*(prj_consts.MODEL_WEIGHTS_DIR_list))) +fully_trained_weights_dir = os.path.join(data_base_output_dir, os.path.join(*(prj_consts.FULLY_PRETRAINED_MODEL_DIR_list))) + +nih_chest_xray_data_dir = os.path.join(data_base_input_dir, + os.path.join(*(prj_consts.ChestXray_IMAGES_DIR_list))) + +data_partitions_dir = os.path.join(data_base_output_dir, + os.path.join(*(prj_consts.DATA_PARTITIONS_DIR_list))) + +label_path = os.path.join(data_partitions_dir,'labels14_unormalized_cleaned.pickle') + +partition_path = os.path.join(data_partitions_dir, 'partition14_unormalized_cleaned.pickle') + +model_file_name = 'azure_chest_xray_14_weights_712split_epoch_054_val_loss_191.2588.hdf5' +model = load_model(os.path.join(fully_trained_weights_dir, model_file_name)) +model.save_weights(os.path.join(fully_trained_weights_dir, 'weights_only_'+model_file_name)) +models_file_name= [os.path.join(fully_trained_weights_dir, + 'weights_only_azure_chest_xray_14_weights_712split_epoch_054_val_loss_191.2588.hdf5')] +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 +os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" + + + +resized_height = 224 +resized_width = 224 +num_channel = 3 +num_classes = 14 +batch_size = 100 #512 + +def get_available_gpus(): + """ + Returns: number of GPUs available in the system + """ + local_device_protos = device_lib.list_local_devices() + return [x.name for x in local_device_protos if x.device_type == 'GPU'] + +get_available_gpus() +# get number of available GPUs +print("num of GPUs:", len(get_available_gpus())) + +num_gpu = get_available_gpus() +# get number of available GPUs +print("num of GPUs:", len(get_available_gpus())) + +pathologies_name_list = prj_consts.DISEASE_list +pathologies_name_list + +stanford_result = [0.8094, 0.9248, 0.8638, 0.7345, 0.8676, 0.7802, 0.7680, 0.8887, 0.7901, 0.8878, 0.9371, 0.8047, + 0.8062, 0.9164] + + +with open(label_path, 'rb') as f: + labels = pickle.load(f) + +with open(partition_path, 'rb') as f: + partition = pickle.load(f) + +class DataGenSequence(Sequence): + def __init__(self, labels, image_file_index, current_state): + self.batch_size = batch_size + self.labels = labels + self.img_file_index = image_file_index + self.current_state = current_state + self.len = len(self.img_file_index) // self.batch_size + print("for DataGenSequence", current_state, "total rows are:", len(self.img_file_index), ", len is", self.len) + + def __len__(self): + return self.len + + def __getitem__(self, idx): + # print("loading data segmentation", idx) + # make sure each batch size has the same amount of data + current_batch = self.img_file_index[idx * self.batch_size: (idx + 1) * self.batch_size] + X = np.empty((self.batch_size, resized_height, resized_width, num_channel)) + y = np.empty((self.batch_size, num_classes)) + + for i, image_name in enumerate(current_batch): + path = os.path.join(nih_chest_xray_data_dir, image_name) + + # loading data + + img = cv2.resize(cv2.imread(path), (resized_height, resized_width)).astype(np.float16) + X[i, :, :, :] = img + y[i, :] = labels[image_name] + + # only do random flipping in training status + if self.current_state == 'train': + # this is different from the training code + x_augmented = X + else: + x_augmented = X + + return x_augmented, y + + + +# load test data +X_test = np.empty((len(partition['test']), 224, 224, 3), dtype=np.float16) +y_test = np.empty((len(partition['test']) - len(partition['test']) % batch_size, 14), dtype=np.float16) + +for i, npy in enumerate(partition['test']): + if (i < len(y_test)): + # round to batch_size + y_test[i, :] = labels[npy] + +print("len of result is", len(y_test)) +y_pred_list = np.empty((len(models_file_name), len(partition['test']), 14), dtype=np.float16) + +# individual models +for index, current_model_file in enumerate(models_file_name): + print(current_model_file) +# model = load_model(current_model_file) + model = azure_chestxray_keras_utils.build_model(keras_contrib.applications.densenet.DenseNetImageNet121); model.load_weights(current_model_file) + print('evaluation for model', current_model_file) + # y_pred = model.predict(X_test) + + y_pred = model.predict_generator(generator=DataGenSequence(labels, partition['test'], current_state='test'), + workers=32, verbose=1, max_queue_size=1) + print("result shape", y_pred.shape) + + # add one fake row of ones in both test and pred values to avoid: + # ValueError: Only one class present in y_true. ROC AUC score is not defined in that case. + y_test = np.insert(y_test, 0, np.ones((y_test.shape[1],)), 0) + y_pred = np.insert(y_pred, 0, np.ones((y_pred.shape[1],)), 0) + + df = pd.DataFrame(columns=['Disease', 'Our AUC Score', 'Stanford AUC Score']) + for d in range(14): + df.loc[d] = [pathologies_name_list[d], + metrics.roc_auc_score(y_test[:, d], y_pred[:, d]), + stanford_result[d]] + + df['Delta'] = df['Stanford AUC Score'] - df['Our AUC Score'] + df.to_csv(current_model_file + ".csv", index=False) + print(df) \ No newline at end of file diff --git a/Predict-Lung-Disease-master/040_cam_simple.py b/Predict-Lung-Disease-master/040_cam_simple.py new file mode 100644 index 0000000..d58cc91 --- /dev/null +++ b/Predict-Lung-Disease-master/040_cam_simple.py @@ -0,0 +1,74 @@ +import sys, os +import cv2 +import matplotlib +import keras_contrib +import azure_chestxray_cam, azure_chestxray_utils, azure_chestxray_keras_utils +import keras_contrib +from keras.models import Model + + +path = os.getcwd()+r'\azure-share' +amlWBSharedDir = path + +prj_consts = azure_chestxray_utils.chestxray_consts() + +data_base_output_dir=os.path.join(amlWBSharedDir, + os.path.join(*(prj_consts.BASE_OUTPUT_DIR_list))) +data_base_input_dir=os.path.join(amlWBSharedDir, + os.path.join(*(prj_consts.BASE_INPUT_DIR_list))) + +# "quality" models, fully trained on all training data +fully_trained_weights_dir=os.path.join(data_base_output_dir, + os.path.join(*(prj_consts.FULLY_PRETRAINED_MODEL_DIR_list))) + +test_images_dir=os.path.join(data_base_input_dir, + os.path.join(*(['test_images']))) + +test_images=azure_chestxray_utils.get_files_in_dir(test_images_dir) + +nih_chest_xray_data_dir=os.path.join(data_base_input_dir, + os.path.join(*(prj_consts.ChestXray_IMAGES_DIR_list))) + +chestXray_images=azure_chestxray_utils.get_files_in_dir(nih_chest_xray_data_dir) + + +model = azure_chestxray_keras_utils.build_model(keras_contrib.applications.densenet.DenseNetImageNet121) + +model_file_name = prj_consts.PRETRAINED_DENSENET201_IMAGENET_CHESTXRAY_MODEL_FILE_NAME +model_file_name = 'weights_only_azure_chest_xray_14_weights_712split_epoch_054_val_loss_191.2588.hdf5' +model.load_weights(os.path.join(fully_trained_weights_dir, model_file_name)) + + +cv2_image = cv2.imread(os.path.join(test_images_dir,test_images[3])) + + + +predictions = model.predict(cv2_image[None,:,:,:]) +print(predictions) +conv_map_model = Model(inputs=model.input, outputs=model.get_layer(index=-3).output) +conv_features = conv_map_model.predict(cv2_image[None,:,:,:]) +conv_features = conv_features[0, :, :, :] #np.squeeze(conv_features) +class_weights = model.layers[-1].get_weights() + +cv2_image = cv2.imread(os.path.join(test_images_dir,test_images[3])) + +azure_chestxray_utils.print_image_stats_by_channel(cv2_image) +cv2_image = azure_chestxray_utils.normalize_nd_array(cv2_image) +cv2_image = 255*cv2_image +cv2_image=cv2_image.astype('uint8') +azure_chestxray_utils.print_image_stats_by_channel(cv2_image) + +predictions, cam_image, predicted_disease_index = \ +azure_chestxray_cam.get_score_and_cam_picture(cv2_image, model) +print(predictions) + +prj_consts.DISEASE_list[predicted_disease_index] +print('likely disease: ', prj_consts.DISEASE_list[predicted_disease_index]) +print('likely disease prob ratio: ', \ + predictions[predicted_disease_index]/sum(predictions)) + + + +NIH_annotated_nodules = ['00000706_000.png', '00000702_000.png'] +azure_chestxray_cam.process_nih_data(NIH_annotated_nodules, + nih_chest_xray_data_dir, model) \ No newline at end of file diff --git a/Predict-Lung-Disease-master/README.md b/Predict-Lung-Disease-master/README.md new file mode 100644 index 0000000..39bddb5 --- /dev/null +++ b/Predict-Lung-Disease-master/README.md @@ -0,0 +1,74 @@ +# Predict-Lung-Disease-through-Chest-X-Ray +We obtain this repository by refactoring the [code](https://github.com/Azure/AzureChestXRay) for the blog post [Using Microsoft AI to Build a Lung-Disease Prediction Model using Chest X-Ray Images](https://blogs.technet.microsoft.com/machinelearning/2018/03/07/using-microsoft-ai-to-build-a-lung-disease-prediction-model-using-chest-x-ray-images/). This instruction aims to help newcomers build the system in a very short time. +# Installation +1. Clone this repository + ```Shell + git clone https://github.com/svishwa/crowdcount-mcnn.git + ``` + We'll call the directory that you cloned PredictLungDisease `ROOT` + +2. All essential dependencies should be installed:pickle, random, re, tqdm, cv2, numpy, pandas, sklearn, keras, tensorflow, keras_contrib, collections.counter. + +# Data set up +1. Download the NIH Chest X-ray Dataset from here: + https://nihcc.app.box.com/v/ChestXray-NIHCC. + You need to get all the image files (all the files under `images` folder in NIH Dataset), `Data_Entry_2017.csv` file, as well as the Bounding Box data `BBox_List_2017.csv`. + +2. Create Directory + ```Shell + mkdir ROOT/azure-share/chestxray/data/ChestX-ray8/ChestXray-NIHCC + mkdir ROOT/azure-share/chestxray/data/ChestX-ray8/ChestXray-NIHCC_other + ``` +3. Save all images under `ROOT/azure-share/chestxray/data/ChestX-ray8/ChestXray-NIHCC` + +4. Save `Data_Entry_2017.csv` and `BBox_List_2017.csv` under `ROOT/azure-share/chestxray/data/ChestX-ray8/ChestXray-NIHCC_other` + +5. Process the Data + ```Shell + mkdir ROOT/azure-share/chestxray/output/data_partitions + ``` + Run `000_preprocess.py` to create `*.pickle` files under this directory +# Test +1. We have provided the pretrained-model `azure_chest_xray_14_weights_712split_epoch_054_val_loss_191.2588.hdf5` under `ROOT/azure-share/chestxray/output/fully_trained_models`. You can also download it separately from [here](https://chestxray.blob.core.windows.net/chestxraytutorial/tutorial_xray/chexray_14_weights_712split_epoch_054_val_loss_191.2588.hdf5). + +2. Run `020_evaluate.py` and it will create `weights_only_azure_chest_xray_14_weights_712split_epoch_054_val_loss_191.2588.hdf5` saving weights of the pretrained-model under the same directory. + +3. Below is the result showing the AUC score of all the 14 diseases: + + | Disease | Our AUC Score | Stanford AUC Score | Delta + |--------------------|------------------|--------------------|-----------: + | Atelectasis | 0.822334 | 0.8094 | -0.012934 + | Cardiomegaly | 0.933610 | 0.9248 | -0.008810 + | Effusion | 0.882471 | 0.8638 | -0.018671 + | Infiltration | 0.744504 | 0.7345 | -0.010004 + | Mass | 0.858467 | 0.8676 | 0.009133 + | Nodule | 0.784230 | 0.7802 | -0.004030 + | Pneumonia | 0.800054 | 0.7680 | -0.032054 + | Pneumothorax | 0.829764 | 0.8887 | 0.058936 + | Consolidation | 0.811969 | 0.7901 | -0.021869 + | Edema | 0.894102 | 0.8878 | -0.006302 + | Emphysema | 0.847477 | 0.9371 | 0.089623 + | Fibrosis | 0.882602 | 0.8047 | -0.077902 + | Pleural Thickening | 1.000000 | 0.8062 | -0.193800 + | Hernia | 0.916610 | 0.9164 | -0.000210 + +# Visualization +1. Create Folder Test + ```Shell + mkdir ROOT/azure-share/chestxray/data/ChestX-ray8/test_images + ``` + Copy any number of images under `ChestXray-NIHCC` to `test_images` and resize them to 224x224 pixels. + +2. Run `004_cam_simple.py` and it will output a Class Activation Map(CAM). The CAM lets us see which regions in the image were relevant to this class. + + ![这里随便写文字](https://github.com/fatLime/Predict-Lung-Disease/blob/master/image.png) + +# Referenced Paper +- Baseline result: https://arxiv.org/abs/1705.02315 +- Image Localization: http://arxiv.org/abs/1512.04150 +- The original chexnet paper mentioned in [StanfordML website](https://stanfordmlgroup.github.io/projects/chexnet/) as well as their [paper](https://arxiv.org/abs/1711.05225). +- http://cs231n.stanford.edu/reports/2017/pdfs/527.pdf for pre-processing the data +- https://arxiv.org/abs/1711.08760 for some other thoughts on the model architecture and the relationship between different diseases + +# Notes + Please contact yanhaotian@bupt.edu.cn if you have any problem. diff --git a/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/data/ChestX-ray8/test_images/00000702_000.png b/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/data/ChestX-ray8/test_images/00000702_000.png new file mode 100644 index 0000000..b541ee2 Binary files /dev/null and b/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/data/ChestX-ray8/test_images/00000702_000.png differ diff --git a/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/data/ChestX-ray8/test_images/00000706_000.png b/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/data/ChestX-ray8/test_images/00000706_000.png new file mode 100644 index 0000000..10ebc70 Binary files /dev/null and b/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/data/ChestX-ray8/test_images/00000706_000.png differ diff --git a/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/output/data_partitions/labels14_unormalized_cleaned.pickle b/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/output/data_partitions/labels14_unormalized_cleaned.pickle new file mode 100644 index 0000000..cdfa519 Binary files /dev/null and b/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/output/data_partitions/labels14_unormalized_cleaned.pickle differ diff --git a/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/output/data_partitions/partition14_unormalized_cleaned.pickle b/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/output/data_partitions/partition14_unormalized_cleaned.pickle new file mode 100644 index 0000000..e7d2a81 Binary files /dev/null and b/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/output/data_partitions/partition14_unormalized_cleaned.pickle differ diff --git a/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/output/data_partitions/train_test_valid_data_partitions.pickle b/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/output/data_partitions/train_test_valid_data_partitions.pickle new file mode 100644 index 0000000..dc450cf Binary files /dev/null and b/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/output/data_partitions/train_test_valid_data_partitions.pickle differ diff --git a/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/output/fully_trained_models/azure_chest_xray_14_weights_712split_epoch_054_val_loss_191.2588.hdf5 b/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/output/fully_trained_models/azure_chest_xray_14_weights_712split_epoch_054_val_loss_191.2588.hdf5 new file mode 100644 index 0000000..ac6f4bf Binary files /dev/null and b/Predict-Lung-Disease-master/azure-share/azure-share/chestxray/output/fully_trained_models/azure_chest_xray_14_weights_712split_epoch_054_val_loss_191.2588.hdf5 differ diff --git a/Predict-Lung-Disease-master/azure-share/chestxray/data/ChestX-ray8/test_images/00000702_000.png b/Predict-Lung-Disease-master/azure-share/chestxray/data/ChestX-ray8/test_images/00000702_000.png new file mode 100644 index 0000000..b541ee2 Binary files /dev/null and b/Predict-Lung-Disease-master/azure-share/chestxray/data/ChestX-ray8/test_images/00000702_000.png differ diff --git a/Predict-Lung-Disease-master/azure-share/chestxray/data/ChestX-ray8/test_images/00000706_000.png b/Predict-Lung-Disease-master/azure-share/chestxray/data/ChestX-ray8/test_images/00000706_000.png new file mode 100644 index 0000000..10ebc70 Binary files /dev/null and b/Predict-Lung-Disease-master/azure-share/chestxray/data/ChestX-ray8/test_images/00000706_000.png differ diff --git a/Predict-Lung-Disease-master/azure-share/chestxray/output/data_partitions/labels14_unormalized_cleaned.pickle b/Predict-Lung-Disease-master/azure-share/chestxray/output/data_partitions/labels14_unormalized_cleaned.pickle new file mode 100644 index 0000000..cdfa519 Binary files /dev/null and b/Predict-Lung-Disease-master/azure-share/chestxray/output/data_partitions/labels14_unormalized_cleaned.pickle differ diff --git a/Predict-Lung-Disease-master/azure-share/chestxray/output/data_partitions/partition14_unormalized_cleaned.pickle b/Predict-Lung-Disease-master/azure-share/chestxray/output/data_partitions/partition14_unormalized_cleaned.pickle new file mode 100644 index 0000000..e7d2a81 Binary files /dev/null and b/Predict-Lung-Disease-master/azure-share/chestxray/output/data_partitions/partition14_unormalized_cleaned.pickle differ diff --git a/Predict-Lung-Disease-master/azure-share/chestxray/output/data_partitions/train_test_valid_data_partitions.pickle b/Predict-Lung-Disease-master/azure-share/chestxray/output/data_partitions/train_test_valid_data_partitions.pickle new file mode 100644 index 0000000..dc450cf Binary files /dev/null and b/Predict-Lung-Disease-master/azure-share/chestxray/output/data_partitions/train_test_valid_data_partitions.pickle differ diff --git a/Predict-Lung-Disease-master/azure-share/chestxray/output/fully_trained_models/azure_chest_xray_14_weights_712split_epoch_054_val_loss_191.2588.hdf5 b/Predict-Lung-Disease-master/azure-share/chestxray/output/fully_trained_models/azure_chest_xray_14_weights_712split_epoch_054_val_loss_191.2588.hdf5 new file mode 100644 index 0000000..ac6f4bf Binary files /dev/null and b/Predict-Lung-Disease-master/azure-share/chestxray/output/fully_trained_models/azure_chest_xray_14_weights_712split_epoch_054_val_loss_191.2588.hdf5 differ diff --git a/Predict-Lung-Disease-master/image.png b/Predict-Lung-Disease-master/image.png new file mode 100644 index 0000000..12daa3d Binary files /dev/null and b/Predict-Lung-Disease-master/image.png differ diff --git a/Predict-Lung-Disease-master/src/__pycache__/azure_chestxray_cam.cpython-37.pyc b/Predict-Lung-Disease-master/src/__pycache__/azure_chestxray_cam.cpython-37.pyc new file mode 100644 index 0000000..6cb4f0b Binary files /dev/null and b/Predict-Lung-Disease-master/src/__pycache__/azure_chestxray_cam.cpython-37.pyc differ diff --git a/Predict-Lung-Disease-master/src/__pycache__/azure_chestxray_keras_utils.cpython-37.pyc b/Predict-Lung-Disease-master/src/__pycache__/azure_chestxray_keras_utils.cpython-37.pyc new file mode 100644 index 0000000..6a6392c Binary files /dev/null and b/Predict-Lung-Disease-master/src/__pycache__/azure_chestxray_keras_utils.cpython-37.pyc differ diff --git a/Predict-Lung-Disease-master/src/__pycache__/azure_chestxray_utils.cpython-37.pyc b/Predict-Lung-Disease-master/src/__pycache__/azure_chestxray_utils.cpython-37.pyc new file mode 100644 index 0000000..de31085 Binary files /dev/null and b/Predict-Lung-Disease-master/src/__pycache__/azure_chestxray_utils.cpython-37.pyc differ diff --git a/Predict-Lung-Disease-master/src/azure_chestxray_cam.py b/Predict-Lung-Disease-master/src/azure_chestxray_cam.py new file mode 100644 index 0000000..2235b1c --- /dev/null +++ b/Predict-Lung-Disease-master/src/azure_chestxray_cam.py @@ -0,0 +1,161 @@ +### Copyright (C) Microsoft Corporation. + +import keras.backend as K +import sys, os, io +import numpy as np +import cv2 + +import matplotlib +matplotlib.use('agg') + +paths_to_append = [os.path.join(os.getcwd(), os.path.join(*(['Code', 'src'])))] +def add_path_to_sys_path(path_to_append): + if not (any(path_to_append in paths for paths in sys.path)): + sys.path.append(path_to_append) +[add_path_to_sys_path(crt_path) for crt_path in paths_to_append] + +import azure_chestxray_utils + + +def get_score_and_cam_picture(cv2_input_image, DenseNetImageNet121_model): +# based on https://github.com/jacobgil/keras-cam/blob/master/cam.py + width, height, _ = cv2_input_image.shape + class_weights = DenseNetImageNet121_model.layers[-1].get_weights()[0] + final_conv_layer = DenseNetImageNet121_model.layers[-3] + get_output = K.function([DenseNetImageNet121_model.layers[0].input], + [final_conv_layer.output, \ + DenseNetImageNet121_model.layers[-1].output]) + [conv_outputs, prediction] = get_output([cv2_input_image[None,:,:,:]]) + conv_outputs = conv_outputs[0, :, :, :] + prediction = prediction[0,:] + + #Create the class activation map. + predicted_disease = np.argmax(prediction) + cam = np.zeros(dtype = np.float32, shape = conv_outputs.shape[:2]) + for i, w in enumerate(class_weights[:, predicted_disease]): + cam += w * conv_outputs[:, :, i] + + return prediction, cam, predicted_disease + + +def process_cam_image(crt_cam_image, xray_image, crt_alpha = .5): + im_width, im_height, _ = xray_image.shape + crt_cam_image = cv2.resize(crt_cam_image, (im_width, im_height), + interpolation=cv2.INTER_CUBIC) + +# do some gamma enhancement, e is too much + crt_cam_image = np.power(1.1, crt_cam_image) + crt_cam_image = azure_chestxray_utils.normalize_nd_array(crt_cam_image) + # crt_cam_image[np.where(crt_cam_image < 0.5)] = 0 + crt_cam_image = 255*crt_cam_image + + # make cam an rgb image + empty_image_channel = np.zeros(dtype = np.float32, shape = crt_cam_image.shape[:2]) + crt_cam_image = cv2.merge((crt_cam_image,empty_image_channel,empty_image_channel)) + + blended_image = cv2.addWeighted(xray_image.astype('uint8'),crt_alpha, + crt_cam_image.astype('uint8'),(1-crt_alpha),0) + return(blended_image) + +def plot_cam_results(crt_blended_image, crt_cam_image, crt_xray_image, map_caption): + import matplotlib + matplotlib.use('TkAgg') + import matplotlib.pyplot as plt + fig = plt.figure(figsize = (15,7)) + + ax1 = fig.add_subplot(2, 3, 1) + ax1.imshow(crt_xray_image, cmap = 'gray', interpolation = 'bicubic') + ax1.set_title('Orig X Ray') + plt.axis('off') + + + ax2 = fig.add_subplot(2,3, 2) + cam_plot = ax2.imshow(crt_cam_image, cmap=plt.get_cmap('OrRd'), interpolation = 'bicubic') + plt.colorbar(cam_plot, ax=ax2) + ax2.set_title('Activation Map') + plt.axis('off') + + + ax3 = fig.add_subplot(2,3, 3) + blended_plot = ax3.imshow(crt_blended_image, interpolation = 'bicubic') + plt.colorbar(cam_plot, ax=ax3) + ax3.set_title(map_caption) + plt.axis('off') + + plt.show() + + # serialize blended image plot padded in the x/y-direction + image_as_BytesIO = io.BytesIO() + x_direction_pad = 1.05;y_direction_pad=1.2 + extent = ax3.get_window_extent().transformed(fig.dpi_scale_trans.inverted()) + fig.savefig(image_as_BytesIO, + bbox_inches=extent.expanded(x_direction_pad, + y_direction_pad), + format='png') + image_as_BytesIO.seek(0) + return(image_as_BytesIO) + + + +def process_xray_image(crt_xray_image, DenseNetImageNet121_model): + +# print(crt_xray_image.shape) + crt_xray_image = azure_chestxray_utils.normalize_nd_array(crt_xray_image) + crt_xray_image = 255*crt_xray_image + crt_xray_image=crt_xray_image.astype('uint8') + + crt_predictions, crt_cam_image, predicted_disease_index =get_score_and_cam_picture(crt_xray_image, DenseNetImageNet121_model) + + prj_consts = azure_chestxray_utils.chestxray_consts() + likely_disease=prj_consts.DISEASE_list[predicted_disease_index] + likely_disease_prob = 100*crt_predictions[predicted_disease_index] + likely_disease_prob_ratio=100*crt_predictions[predicted_disease_index]/sum(crt_predictions) + print('predictions: ', crt_predictions) + print('likely disease: ', likely_disease) + print('likely disease prob: ', likely_disease_prob) + print('likely disease prob ratio: ', likely_disease_prob_ratio) + + crt_blended_image = process_cam_image(crt_cam_image, crt_xray_image) + plot_cam_results(crt_blended_image, crt_cam_image, crt_xray_image, + str(likely_disease)+ ' ' + + "{0:.1f}".format(likely_disease_prob)+ '% (weight ' + + "{0:.1f}".format(likely_disease_prob_ratio)+ '%)') + +def process_nih_data(nih_data_files, NIH_data_dir, DenseNetImageNet121_model): + for crt_image in nih_data_files: + # print(crt_image) + prj_consts = azure_chestxray_utils.chestxray_consts() + + crt_xray_image = cv2.imread(os.path.join(NIH_data_dir,crt_image)) + crt_xray_image = cv2.resize(crt_xray_image, + (prj_consts.CHESTXRAY_MODEL_EXPECTED_IMAGE_HEIGHT, + prj_consts.CHESTXRAY_MODEL_EXPECTED_IMAGE_WIDTH)).astype(np.float32) + + process_xray_image(crt_xray_image, DenseNetImageNet121_model ) + +if __name__=="__main__": + #FIXME + # add example/test code here + + + + NIH_annotated_Cardiomegaly = ['00005066_030.png'] + data_dir = '' + cv2_image = cv2.imread(os.path.join(data_dir,NIH_annotated_Cardiomegaly[0])) + + azure_chestxray_utils.print_image_stats_by_channel(cv2_image) + cv2_image = azure_chestxray_utils.normalize_nd_array(cv2_image) + cv2_image = 255*cv2_image + cv2_image=cv2_image.astype('uint8') + azure_chestxray_utils.print_image_stats_by_channel(cv2_image) + + predictions, cam_image, predicted_disease_index = get_score_and_cam_picture(cv2_image, model) + print(predictions) + prj_consts = azure_chestxray_utils.chestxray_consts() + print(prj_consts.DISEASE_list[predicted_disease_index]) + print('likely disease: ', prj_consts.DISEASE_list[predicted_disease_index]) + print('likely disease prob ratio: ', + predictions[predicted_disease_index]/sum(predictions)) + blended_image = process_cam_image(cam_image, cv2_image) + plot_cam_results(blended_image, cam_image, cv2_image, + prj_consts.DISEASE_list[predicted_disease_index]) \ No newline at end of file diff --git a/Predict-Lung-Disease-master/src/azure_chestxray_keras_utils.py b/Predict-Lung-Disease-master/src/azure_chestxray_keras_utils.py new file mode 100644 index 0000000..a3c2e59 --- /dev/null +++ b/Predict-Lung-Disease-master/src/azure_chestxray_keras_utils.py @@ -0,0 +1,29 @@ +### Copyright (C) Microsoft Corporation. + +from keras.layers import Dense +from keras.models import Model +from keras_contrib.applications.densenet import DenseNetImageNet121 +import keras_contrib + +def build_model(crt_densenet_function): + """ + + Returns: a model with specified weights + + """ + # define the model, use pre-trained weights for image_net + base_model = crt_densenet_function(input_shape=(224, 224, 3), + weights='imagenet', + include_top=False, + pooling='avg') + + x = base_model.output + predictions = Dense(14, activation='sigmoid')(x) + model = Model(inputs=base_model.input, outputs=predictions) + return model + +if __name__=="__main__": + model = build_model(DenseNetImageNet121) + print(model.summary()) + model = build_model(keras_contrib.applications.densenet.DenseNetImageNet201) + print(model.summary()) \ No newline at end of file diff --git a/Predict-Lung-Disease-master/src/azure_chestxray_pytorch_utils.py b/Predict-Lung-Disease-master/src/azure_chestxray_pytorch_utils.py new file mode 100644 index 0000000..e69de29 diff --git a/Predict-Lung-Disease-master/src/azure_chestxray_utils.py b/Predict-Lung-Disease-master/src/azure_chestxray_utils.py new file mode 100644 index 0000000..6587879 --- /dev/null +++ b/Predict-Lung-Disease-master/src/azure_chestxray_utils.py @@ -0,0 +1,59 @@ +### Copyright (C) Microsoft Corporation. + +import os +import numpy as np + +class chestxray_consts(object): + DISEASE_list = ['Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia', + 'Pneumothorax', + 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 'Pleural Thickening', 'Hernia'] + + PRETRAINED_DENSENET201_IMAGENET_CHESTXRAY_MODEL_FILE_NAME = 'chexnet_14_weights_multigpu_contribmodel_121layer_712split_epoch_011_val_loss_153.9783.hdf5' + FULLY_PRETRAINED_MODEL_DIR_list = [ 'fully_trained_models'] + + + CHESTXRAY_MODEL_EXPECTED_IMAGE_HEIGHT = 224 + CHESTXRAY_MODEL_EXPECTED_IMAGE_WIDTH = 224 + + BASE_INPUT_DIR_list = ['chestxray', 'data', 'ChestX-ray8'] + BASE_OUTPUT_DIR_list = ['chestxray', 'output'] + CREDENTIALS_DIR_list = ['code', 'notShared'] + + SRC_DIR_list = ['Code', 'src'] + ChestXray_IMAGES_DIR_list = ['ChestXray-NIHCC'] + ChestXray_OTHER_DATA_DIR_list = ['ChestXray-NIHCC_other'] + PROCESSED_IMAGES_DIR_list = ['processed_npy14'] + DATA_PARTITIONS_DIR_list = ['data_partitions'] + MODEL_WEIGHTS_DIR_list = ['weights_tmpdir'] + + def __setattr__(self, *_): + raise TypeError + + +# os agnostic 'ls' function +def get_files_in_dir(crt_dir): + return( [f for f in os.listdir(crt_dir) if os.path.isfile(os.path.join(crt_dir, f))]) + + + +def normalize_nd_array(crt_array): + # Normalised [0,1] + crt_array = crt_array - np.min(crt_array) + return(crt_array/np.ptp(crt_array)) + +def print_image_stats_by_channel(crt_image): + print('min:') + print(np.amin(crt_image[:,:,0]), + np.amin(crt_image[:,:,1]), + np.amin(crt_image[:,:,2])) + print('max:') + print(np.amax(crt_image[:,:,0]), + np.amax(crt_image[:,:,1]), + np.amax(crt_image[:,:,2])) + + + +if __name__=="__main__": + prj_consts = chestxray_consts() + print('model_expected_image_height = ', prj_consts.CHESTXRAY_MODEL_EXPECTED_IMAGE_HEIGHT) + print('model_expected_image_width = ', prj_consts.CHESTXRAY_MODEL_EXPECTED_IMAGE_WIDTH) diff --git a/Predict-Lung-Disease-master/src/finding_lungs/blacklist_non_PA_AP_view.csv b/Predict-Lung-Disease-master/src/finding_lungs/blacklist_non_PA_AP_view.csv new file mode 100644 index 0000000..e986dcf --- /dev/null +++ b/Predict-Lung-Disease-master/src/finding_lungs/blacklist_non_PA_AP_view.csv @@ -0,0 +1,56 @@ +00000591_003.png +00001136_001.png +00001153_005.png +00001602_000.png +00001803_003.png +00002097_000.png +00002117_003.png +00002354_000.png +00002592_003.png +00002639_009.png +00003023_000.png +00003094_002.png +00004533_004.png +00004808_001.png +00004906_000.png +00005192_001.png +00005260_000.png +00005286_001.png +00006462_008.png +00006836_008.png +00006851_004.png +00007113_001.png +00007152_006.png +00007160_002.png +00007454_001.png +00007482_010.png +00007716_007.png +00008016_000.png +00008082_000.png +00009198_002.png +00009368_010.png +00009368_011.png +00009584_002.png +00009889_038.png +00010007_121.png +00010065_000.png +00012249_001.png +00012388_002.png +00012907_007.png +00013160_000.png +00013670_137.png +00013714_001.png +00014294_015.png +00014675_034.png +00014963_000.png +00015054_000.png +00015078_007.png +00016233_004.png +00016637_000.png +00017753_022.png +00017915_003.png +00020373_002.png +00020644_000.png +00025381_004.png +00026806_000.png +00029476_000.png diff --git a/Predict-Lung-Disease-master/src/finding_lungs/blacklist_other_images_with_lower_quality.csv b/Predict-Lung-Disease-master/src/finding_lungs/blacklist_other_images_with_lower_quality.csv new file mode 100644 index 0000000..eaf990a --- /dev/null +++ b/Predict-Lung-Disease-master/src/finding_lungs/blacklist_other_images_with_lower_quality.csv @@ -0,0 +1,1094 @@ +00000032_013.png +00000032_023.png +00000032_024.png +00000032_055.png +00000032_058.png +00000116_007.png +00000244_000.png +00000244_002.png +00000248_013.png +00000248_018.png +00000248_019.png +00000467_013.png +00000468_060.png +00000565_000.png +00000583_005.png +00000583_007.png +00000583_009.png +00000583_019.png +00000583_024.png +00000627_030.png +00000627_036.png +00000703_000.png +00000831_008.png +00000929_000.png +00000929_001.png +00000980_004.png +00001029_003.png +00001075_016.png +00001075_020.png +00001122_016.png +00001122_017.png +00001153_006.png +00001157_002.png +00001179_000.png +00001179_001.png +00001181_000.png +00001223_000.png +00001223_001.png +00001249_004.png +00001255_012.png +00001255_035.png +00001267_001.png +00001278_009.png +00001278_011.png +00001437_038.png +00001501_002.png +00001564_000.png +00001577_003.png +00001595_000.png +00001595_001.png +00001595_002.png +00001686_000.png +00001686_001.png +00001736_005.png +00001736_007.png +00001736_010.png +00001736_014.png +00001736_018.png +00001736_021.png +00001736_025.png +00001736_026.png +00001736_027.png +00001787_002.png +00001787_010.png +00001814_004.png +00001836_014.png +00001855_000.png +00001855_004.png +00001855_009.png +00001855_010.png +00001855_011.png +00001855_012.png +00001855_014.png +00001855_016.png +00001855_018.png +00001855_020.png +00001855_021.png +00001855_022.png +00001855_023.png +00001855_024.png +00001855_025.png +00001855_026.png +00001855_027.png +00001855_028.png +00001855_029.png +00001855_030.png +00001855_032.png +00001855_033.png +00001855_034.png +00001855_035.png +00001855_037.png +00001952_000.png +00001952_001.png +00001952_002.png +00001952_007.png +00001952_008.png +00001986_010.png +00002072_003.png +00002072_004.png +00002072_009.png +00002072_010.png +00002072_011.png +00002072_014.png +00002072_015.png +00002072_018.png +00002072_019.png +00002208_001.png +00002359_018.png +00002366_001.png +00002366_002.png +00002437_036.png +00002437_037.png +00002529_007.png +00002529_023.png +00002529_025.png +00002529_030.png +00002545_001.png +00002582_007.png +00002594_001.png +00002633_023.png +00002636_000.png +00002659_003.png +00002675_005.png +00002733_000.png +00002763_023.png +00002763_024.png +00002892_004.png +00002896_000.png +00003004_000.png +00003005_005.png +00003029_018.png +00003059_000.png +00003060_000.png +00003094_000.png +00003094_003.png +00003094_004.png +00003094_005.png +00003158_001.png +00003186_003.png +00003369_001.png +00003465_000.png +00003465_001.png +00003465_002.png +00003465_003.png +00003465_004.png +00003465_005.png +00003465_006.png +00003465_007.png +00003465_008.png +00003523_036.png +00004276_000.png +00004285_000.png +00004309_006.png +00004344_025.png +00004360_020.png +00004360_023.png +00004472_000.png +00004545_000.png +00004660_000.png +00004672_001.png +00004703_000.png +00004706_001.png +00004792_000.png +00004808_014.png +00004808_094.png +00004811_000.png +00004928_006.png +00005051_000.png +00005094_009.png +00005201_001.png +00005204_001.png +00005220_012.png +00005220_015.png +00005254_003.png +00005254_004.png +00005254_008.png +00005298_013.png +00005360_002.png +00005573_004.png +00005699_005.png +00005712_008.png +00005746_008.png +00005748_000.png +00005750_015.png +00005750_016.png +00005750_017.png +00005877_000.png +00005937_000.png +00005975_001.png +00006008_015.png +00006015_000.png +00006015_003.png +00006039_022.png +00006054_001.png +00006127_000.png +00006209_001.png +00006220_002.png +00006220_003.png +00006220_004.png +00006220_005.png +00006220_006.png +00006220_009.png +00006271_002.png +00006271_078.png +00006271_093.png +00006294_004.png +00006296_011.png +00006381_009.png +00006391_001.png +00006415_000.png +00006446_012.png +00006585_007.png +00006754_008.png +00006838_000.png +00006850_019.png +00006870_000.png +00006904_007.png +00006906_029.png +00006906_031.png +00006906_032.png +00006917_000.png +00006960_022.png +00007001_001.png +00007018_034.png +00007018_035.png +00007108_006.png +00007152_008.png +00007217_005.png +00007269_000.png +00007276_001.png +00007276_002.png +00007322_003.png +00007322_005.png +00007322_009.png +00007322_020.png +00007371_000.png +00007438_000.png +00007500_000.png +00007500_001.png +00007545_000.png +00007558_004.png +00007558_007.png +00007624_036.png +00007830_000.png +00007830_001.png +00007830_004.png +00007830_005.png +00007830_007.png +00007830_010.png +00007973_000.png +00007985_000.png +00008051_039.png +00008051_050.png +00008051_051.png +00008295_010.png +00008297_008.png +00008297_013.png +00008297_016.png +00008314_000.png +00008463_001.png +00008522_057.png +00008549_000.png +00008640_000.png +00008701_008.png +00008911_006.png +00008993_000.png +00009218_020.png +00009218_022.png +00009282_000.png +00009465_004.png +00009508_004.png +00009551_008.png +00009551_022.png +00009573_000.png +00009608_045.png +00009613_005.png +00009621_000.png +00009621_001.png +00009621_002.png +00009621_003.png +00009621_004.png +00009621_005.png +00009621_006.png +00009621_007.png +00009702_006.png +00009727_012.png +00009727_013.png +00009727_014.png +00009727_018.png +00009727_019.png +00009727_020.png +00009727_022.png +00009727_023.png +00009727_027.png +00009727_028.png +00009876_002.png +00009886_000.png +00009892_007.png +00009892_046.png +00009911_004.png +00009953_016.png +00010007_053.png +00010007_060.png +00010007_071.png +00010007_074.png +00010007_082.png +00010007_103.png +00010012_018.png +00010012_026.png +00010092_007.png +00010092_043.png +00010124_000.png +00010294_007.png +00010352_054.png +00010352_074.png +00010360_004.png +00010384_005.png +00010405_000.png +00010405_001.png +00010415_000.png +00010435_002.png +00010544_016.png +00010544_027.png +00010544_030.png +00010693_027.png +00010698_001.png +00010698_013.png +00010761_000.png +00010773_014.png +00010773_025.png +00010790_039.png +00010790_043.png +00010790_045.png +00010792_004.png +00010805_002.png +00010805_003.png +00010805_004.png +00010805_005.png +00010805_006.png +00010805_008.png +00010805_009.png +00010805_010.png +00010805_011.png +00010805_013.png +00010805_015.png +00010805_016.png +00010805_017.png +00010805_018.png +00010805_019.png +00010805_020.png +00010805_023.png +00010805_025.png +00010805_037.png +00010805_038.png +00010805_040.png +00010805_043.png +00010805_045.png +00010805_046.png +00010805_047.png +00010805_048.png +00010805_050.png +00010828_017.png +00010843_000.png +00010887_027.png +00010960_001.png +00010960_002.png +00010995_006.png +00010995_008.png +00011007_000.png +00011021_012.png +00011064_000.png +00011164_007.png +00011237_095.png +00011237_108.png +00011379_002.png +00011379_003.png +00011379_004.png +00011379_005.png +00011379_006.png +00011379_013.png +00011379_018.png +00011379_019.png +00011379_022.png +00011379_039.png +00011379_041.png +00011379_043.png +00011379_045.png +00011379_046.png +00011379_047.png +00011386_000.png +00011391_016.png +00011391_031.png +00011391_032.png +00011391_039.png +00011391_041.png +00011391_043.png +00011391_047.png +00011391_055.png +00011436_009.png +00011461_002.png +00011553_002.png +00011553_003.png +00011553_004.png +00011553_005.png +00011553_006.png +00011553_007.png +00011553_009.png +00011553_010.png +00011553_011.png +00011553_012.png +00011553_013.png +00011553_014.png +00011553_015.png +00011553_016.png +00011553_017.png +00011553_018.png +00011553_019.png +00011553_020.png +00011553_022.png +00011553_023.png +00011553_024.png +00011553_025.png +00011553_026.png +00011553_027.png +00011553_028.png +00011553_029.png +00011553_030.png +00011553_031.png +00011553_032.png +00011553_033.png +00011553_034.png +00011553_035.png +00011553_036.png +00011553_037.png +00011553_038.png +00011553_040.png +00011553_041.png +00011553_046.png +00011553_047.png +00011673_000.png +00011677_001.png +00011677_002.png +00011702_024.png +00011702_062.png +00011731_003.png +00011769_000.png +00011925_047.png +00011925_049.png +00011925_051.png +00011925_053.png +00011925_055.png +00011925_068.png +00011925_071.png +00011925_078.png +00011947_000.png +00011985_008.png +00012141_013.png +00012159_002.png +00012162_001.png +00012276_007.png +00012276_009.png +00012276_010.png +00012276_013.png +00012276_017.png +00012276_018.png +00012368_002.png +00012470_011.png +00012470_012.png +00012515_002.png +00012591_000.png +00012605_000.png +00012605_001.png +00012628_017.png +00012628_060.png +00012648_001.png +00012654_001.png +00012662_000.png +00012742_000.png +00012742_001.png +00012742_002.png +00012798_000.png +00012834_005.png +00012834_007.png +00012834_010.png +00012834_085.png +00012834_120.png +00012834_137.png +00012863_027.png +00012863_039.png +00013049_006.png +00013049_007.png +00013123_004.png +00013152_004.png +00013158_004.png +00013249_004.png +00013249_006.png +00013249_007.png +00013249_008.png +00013249_013.png +00013249_014.png +00013249_017.png +00013249_018.png +00013249_028.png +00013249_033.png +00013249_036.png +00013249_038.png +00013249_041.png +00013249_046.png +00013401_000.png +00013440_000.png +00013499_004.png +00013568_000.png +00013601_013.png +00013608_000.png +00013608_002.png +00013608_004.png +00013608_016.png +00013615_015.png +00013615_025.png +00013615_027.png +00013615_049.png +00013615_057.png +00013615_060.png +00013625_033.png +00013641_014.png +00013641_041.png +00013670_146.png +00013670_162.png +00013670_163.png +00013670_166.png +00013670_167.png +00013685_047.png +00013774_027.png +00013774_041.png +00013774_042.png +00013774_048.png +00013894_010.png +00013894_024.png +00013894_025.png +00013894_027.png +00013896_004.png +00013922_020.png +00013922_021.png +00013966_007.png +00013993_016.png +00013993_049.png +00013993_099.png +00014004_018.png +00014004_023.png +00014014_002.png +00014014_005.png +00014080_001.png +00014112_019.png +00014128_023.png +00014192_000.png +00014203_016.png +00014203_026.png +00014203_028.png +00014203_029.png +00014203_042.png +00014203_044.png +00014223_012.png +00014245_001.png +00014245_003.png +00014314_001.png +00014320_040.png +00014320_043.png +00014323_001.png +00014323_002.png +00014323_003.png +00014332_004.png +00014351_000.png +00014352_001.png +00014465_016.png +00014474_002.png +00014486_004.png +00014509_000.png +00014958_009.png +00014982_000.png +00015007_002.png +00015007_003.png +00015007_005.png +00015007_006.png +00015007_007.png +00015007_008.png +00015007_011.png +00015024_003.png +00015031_006.png +00015031_022.png +00015041_003.png +00015112_004.png +00015126_000.png +00015151_001.png +00015193_014.png +00015213_000.png +00015290_000.png +00015391_001.png +00015462_001.png +00015462_002.png +00015482_000.png +00015530_071.png +00015530_142.png +00015564_011.png +00015605_038.png +00015605_051.png +00015605_053.png +00015605_055.png +00015606_013.png +00015606_050.png +00015696_001.png +00015758_000.png +00015826_019.png +00015923_000.png +00015934_000.png +00015986_000.png +00015996_001.png +00016009_046.png +00016034_003.png +00016051_003.png +00016051_004.png +00016133_000.png +00016175_003.png +00016175_006.png +00016175_008.png +00016184_027.png +00016238_006.png +00016292_000.png +00016292_001.png +00016292_002.png +00016292_003.png +00016292_004.png +00016378_001.png +00016410_006.png +00016410_008.png +00016410_055.png +00016484_001.png +00016484_005.png +00016484_009.png +00016484_011.png +00016484_026.png +00016522_019.png +00016529_000.png +00016638_003.png +00016638_004.png +00016653_000.png +00016732_035.png +00016784_002.png +00016860_001.png +00016860_005.png +00016867_003.png +00016918_005.png +00017036_023.png +00017110_012.png +00017138_032.png +00017207_002.png +00017207_003.png +00017207_008.png +00017258_022.png +00017258_023.png +00017362_009.png +00017392_000.png +00017400_000.png +00017403_007.png +00017403_010.png +00017424_034.png +00017424_035.png +00017424_036.png +00017424_038.png +00017424_041.png +00017425_002.png +00017425_006.png +00017477_000.png +00017504_024.png +00017504_068.png +00017538_001.png +00017538_002.png +00017541_025.png +00017553_000.png +00017561_001.png +00017605_014.png +00017606_020.png +00017618_013.png +00017625_000.png +00017625_004.png +00017641_004.png +00017645_013.png +00017648_000.png +00017695_000.png +00017753_026.png +00017817_001.png +00017817_002.png +00017927_001.png +00017941_005.png +00017972_006.png +00017972_014.png +00017979_000.png +00017999_000.png +00018011_015.png +00018044_020.png +00018044_036.png +00018044_040.png +00018044_043.png +00018069_000.png +00018069_001.png +00018091_012.png +00018103_002.png +00018103_007.png +00018103_009.png +00018104_004.png +00018116_000.png +00018121_000.png +00018125_009.png +00018126_024.png +00018175_002.png +00018191_000.png +00018191_001.png +00018213_001.png +00018240_000.png +00018251_001.png +00018251_002.png +00018251_003.png +00018251_004.png +00018251_005.png +00018251_006.png +00018251_007.png +00018251_008.png +00018251_009.png +00018251_010.png +00018251_011.png +00018251_012.png +00018251_013.png +00018251_014.png +00018253_089.png +00018336_000.png +00018437_001.png +00018437_002.png +00018445_002.png +00018458_000.png +00018486_000.png +00018571_000.png +00018573_000.png +00018598_004.png +00018610_002.png +00018610_004.png +00018614_001.png +00018615_001.png +00018778_001.png +00018778_002.png +00018778_005.png +00018921_026.png +00018921_027.png +00018927_000.png +00018949_001.png +00019020_000.png +00019045_000.png +00019107_001.png +00019124_011.png +00019124_012.png +00019150_007.png +00019301_000.png +00019390_002.png +00019390_004.png +00019534_000.png +00019576_024.png +00019576_063.png +00019576_064.png +00019576_065.png +00019587_000.png +00019592_010.png +00019660_001.png +00019707_010.png +00019888_001.png +00019928_000.png +00019967_001.png +00019967_002.png +00019967_003.png +00019967_004.png +00019967_007.png +00019967_008.png +00019967_009.png +00019967_011.png +00019967_012.png +00019967_013.png +00019967_014.png +00019967_017.png +00019967_019.png +00019967_020.png +00019967_032.png +00020006_001.png +00020108_001.png +00020110_000.png +00020146_002.png +00020213_011.png +00020213_018.png +00020213_060.png +00020213_061.png +00020213_113.png +00020219_000.png +00020326_013.png +00020326_058.png +00020348_000.png +00020364_002.png +00020364_003.png +00020398_010.png +00020438_007.png +00020622_002.png +00020631_009.png +00020928_004.png +00020928_014.png +00020928_015.png +00020945_022.png +00021023_014.png +00021044_000.png +00021108_000.png +00021201_042.png +00021420_013.png +00021420_028.png +00021481_012.png +00021506_001.png +00021508_002.png +00021510_000.png +00021572_010.png +00021695_003.png +00021700_006.png +00021729_000.png +00021770_012.png +00021770_014.png +00021770_015.png +00021770_016.png +00021811_003.png +00021835_029.png +00021901_005.png +00021917_000.png +00021942_006.png +00021990_002.png +00022010_001.png +00022051_000.png +00022174_000.png +00022174_001.png +00022245_011.png +00022283_029.png +00022339_000.png +00022416_052.png +00022470_007.png +00022486_000.png +00022523_004.png +00022523_005.png +00022524_000.png +00022528_007.png +00022566_022.png +00022599_004.png +00022714_000.png +00022723_000.png +00022725_003.png +00022727_001.png +00022815_004.png +00022815_015.png +00022815_020.png +00022815_031.png +00022815_037.png +00022815_058.png +00022815_068.png +00022815_073.png +00022815_079.png +00022872_001.png +00022872_002.png +00022975_004.png +00023027_000.png +00023068_015.png +00023129_000.png +00023160_003.png +00023176_019.png +00023192_000.png +00023195_000.png +00023197_000.png +00023254_003.png +00023271_016.png +00023325_037.png +00023325_039.png +00025066_000.png +00025203_000.png +00025223_000.png +00025290_014.png +00025445_001.png +00025513_001.png +00025513_005.png +00025513_006.png +00025513_007.png +00025513_008.png +00025513_009.png +00025513_010.png +00025513_011.png +00025513_012.png +00025513_013.png +00025513_014.png +00025529_010.png +00025628_024.png +00025628_026.png +00025628_027.png +00025664_037.png +00025665_000.png +00025691_000.png +00025691_002.png +00025697_001.png +00025704_000.png +00025796_000.png +00025809_001.png +00025839_010.png +00025839_012.png +00025932_001.png +00025958_000.png +00025958_002.png +00025958_003.png +00025958_006.png +00026068_000.png +00026068_001.png +00026092_003.png +00026098_028.png +00026099_041.png +00026114_000.png +00026115_000.png +00026159_000.png +00026167_008.png +00026194_001.png +00026194_004.png +00026194_007.png +00026194_008.png +00026194_009.png +00026194_010.png +00026194_011.png +00026194_012.png +00026194_014.png +00026194_015.png +00026194_018.png +00026232_030.png +00026262_000.png +00026346_015.png +00026349_005.png +00026382_009.png +00026431_000.png +00026474_003.png +00026506_000.png +00026538_025.png +00026621_000.png +00026634_000.png +00026666_000.png +00026701_000.png +00026758_000.png +00026801_005.png +00026818_020.png +00026867_002.png +00026867_004.png +00026911_004.png +00026925_006.png +00026925_011.png +00026925_015.png +00026963_032.png +00026971_026.png +00026993_003.png +00026993_004.png +00027072_000.png +00027196_009.png +00027196_010.png +00027213_001.png +00027213_008.png +00027213_009.png +00027213_010.png +00027213_076.png +00027213_079.png +00027299_006.png +00027299_007.png +00027415_009.png +00027415_011.png +00027415_028.png +00027415_029.png +00027415_037.png +00027415_046.png +00027415_047.png +00027415_049.png +00027415_059.png +00027415_068.png +00027415_069.png +00027415_072.png +00027415_073.png +00027415_075.png +00027415_077.png +00027441_012.png +00027441_017.png +00027441_019.png +00027441_024.png +00027442_008.png +00027464_024.png +00027465_008.png +00027524_000.png +00027618_012.png +00027623_006.png +00027639_000.png +00027639_001.png +00027639_002.png +00027639_003.png +00027677_000.png +00027710_000.png +00027725_021.png +00027725_035.png +00027726_016.png +00027726_019.png +00027726_020.png +00027726_021.png +00027726_050.png +00027726_051.png +00027765_000.png +00027765_002.png +00027952_004.png +00027981_000.png +00027981_001.png +00027981_002.png +00028076_000.png +00028092_000.png +00028201_000.png +00028211_012.png +00028301_002.png +00028341_001.png +00028341_002.png +00028341_003.png +00028341_004.png +00028341_005.png +00028341_006.png +00028341_007.png +00028341_008.png +00028341_009.png +00028341_010.png +00028341_011.png +00028341_012.png +00028389_000.png +00028450_000.png +00028454_011.png +00028454_013.png +00028474_000.png +00028657_000.png +00028799_000.png +00028829_002.png +00028873_017.png +00028873_019.png +00028873_020.png +00028882_004.png +00028961_006.png +00028961_008.png +00028996_002.png +00028996_003.png +00028996_004.png +00029174_002.png +00029222_003.png +00029235_001.png +00029245_002.png +00029276_004.png +00029404_000.png +00029404_002.png +00029404_003.png +00029404_004.png +00029404_005.png +00029404_006.png +00029404_007.png +00029404_008.png +00029404_010.png +00029476_003.png +00029596_012.png +00029627_000.png +00029813_029.png +00029943_022.png +00030079_020.png +00030079_031.png +00030206_011.png +00030209_011.png +00030213_000.png +00030245_001.png +00030320_004.png +00030320_006.png +00030323_038.png +00030410_005.png +00030412_002.png +00030609_000.png +00030609_001.png +00030609_002.png +00030609_003.png +00030609_006.png +00030609_008.png +00030609_009.png +00030609_010.png +00030609_011.png +00030609_017.png +00030609_021.png +00030609_023.png +00030609_026.png +00030786_004.png diff --git a/Predict-Lung-Disease-master/src/finding_lungs/blacklist_rotated_images.csv b/Predict-Lung-Disease-master/src/finding_lungs/blacklist_rotated_images.csv new file mode 100644 index 0000000..c8a4148 --- /dev/null +++ b/Predict-Lung-Disease-master/src/finding_lungs/blacklist_rotated_images.csv @@ -0,0 +1,18 @@ +00001255_007.png +00001814_001.png +00002180_000.png +00002815_003.png +00003693_005.png +00005823_000.png +00007188_002.png +00008051_036.png +00008468_003.png +00009889_023.png +00009984_001.png +00011460_066.png +00013299_000.png +00013431_000.png +00017258_011.png +00017606_037.png +00019620_001.png +00026701_001.png diff --git a/Predict-Lung-Disease-master/src/finding_lungs/finding_lungs_DL_approach.py b/Predict-Lung-Disease-master/src/finding_lungs/finding_lungs_DL_approach.py new file mode 100644 index 0000000..314cfce --- /dev/null +++ b/Predict-Lung-Disease-master/src/finding_lungs/finding_lungs_DL_approach.py @@ -0,0 +1,166 @@ +import os + +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 +os.environ["CUDA_VISIBLE_DEVICES"] = "0" +import pandas as pd +from keras.models import load_model +from skimage import exposure, img_as_float +from skimage import transform +import numpy as np + +from skimage import measure +import lungs_finder as lf +import cv2 + +# for lung detection +left_edge = 0 +right_edge = 256 +top_edge = 0 +bottom_edge = 256 +margin = 12 + +row_size = 256 +col_size = 256 + +# Path to csv-file. File should contain X-ray filenames as first column, +# mask filenames as second column. + +out_folder_matched_img = os.path.join("/mnt", "MyAzureFileShare", "Data", "ChestXRay", "images_centered") +out_folder_mismatched_image = os.path.join("/mnt", "MyAzureFileShare", "Data", "ChestXRay", + "images_centered_mismatched_by_both") +csv_path = os.path.join("/mnt", "MyAzureFileShare", "Data", "ChestXRay", "Data_Entry_2017.csv") +# Path to the folder with images. Images will be read from path + path_from_csv +img_path = os.path.join("/mnt", "MyAzureFileShare", "Data", "ChestXRay", "images") +mis_detected_csv_path = os.path.join("/mnt", "MyAzureFileShare", "Data", "ChestXRay", "mis_detected.csv") +df = pd.read_csv(csv_path) + +# Load test data +im_shape = (256, 256) + +# Load model +# plt.figure(figsize=(10, 10)) +model_name = './trained_model.hdf5.bak' +UNet = load_model(model_name) + +threshold = 0.85 +# list to save the mis detected images +image_misdetect_list = [] + + +def finding_lungs_non_DL_approach_and_save(image, file_name): + # print(row.columns.values) + # file_name = row[0] + # print("line is", file_name, image.shape) + # when reading from txt there is something in the end so we need to eliminate that + # image = cv2.imread(os.path.join("Z:\\", "Data", "ChestXRay", "images", file_name), 0) + + img_height = image.shape[0] + img_width = image.shape[1] + # Get both lungs image. It uses HOG as main method, + # but if HOG found nothing it uses HAAR or LBP. + found_lungs = lf.get_lungs(image) + + # this can be written in a more concise way but we just keep it a bit redundant for easy reading + if found_lungs is not None and found_lungs.shape[0] > img_height / 2 and found_lungs.shape[1] > img_width / 2: + # print(found_lungs.shape) + found_lungs_resized = cv2.resize(found_lungs, im_shape) + # cv2.imshow(file_name, found_lungs) + # code = cv2.waitKey(0) + cv2.imwrite(os.path.join(out_folder_matched_img, file_name), found_lungs_resized) + return True + else: + cv2.imwrite(os.path.join(out_folder_mismatched_image, file_name), cv2.resize(image, im_shape)) + return False + + +for index, item in df.iterrows(): + # X, y = loadDataGeneral(current_df, path, im_shape) + raw_img = cv2.imread(os.path.join(img_path, item['Image Index'])) + + img = img_as_float(raw_img)[:, :, 0] + img = transform.resize(img, im_shape) + img = exposure.equalize_hist(img) + # img = np.expand_dims(img, -1) + img -= img.mean() + img /= img.std() + + file_name = item['Image Index'] + X = np.expand_dims(img, axis=0) + X = np.expand_dims(X, axis=-1) + n_test = X.shape[0] + inp_shape = X[0].shape + + # img = exposure.rescale_intensity(np.squeeze(X), out_range=(0, 1)) + + # print("size of img is", img.shape) + prediction = UNet.predict(X)[..., 0].reshape(inp_shape[:2]) + + thresh_img = np.where(prediction > threshold, 1.0, 0.0) # threshold the image + + labels = measure.label(thresh_img) # Different labels are displayed in different colors + label_vals = np.unique(labels) + # print(label_vals) + regions = measure.regionprops(labels) + good_labels = [] + global_B_box = [] + for prop in regions: + B = prop.bbox + if B[2] - B[0] > row_size / 4 and B[3] - B[1] > col_size / 6: # make sure size of lung to avoid small areas + good_labels.append(prop.label) + global_B_box.append(B) + + # print(len(good_labels)) + + DL_failed_detect_flag = False + if len(good_labels) == 2: + + left_edge = np.clip(min(global_B_box[0][1] - margin, global_B_box[1][1] - margin), a_min=0, a_max=256) + right_edge = np.clip(max(global_B_box[0][3] + margin, global_B_box[1][3] + margin), a_min=0, a_max=256) + top_edge = np.clip(min(global_B_box[0][0] - margin, global_B_box[1][0] - margin), a_min=0, a_max=256) + bottom_edge = np.clip(max(global_B_box[0][2] + margin * 3, global_B_box[1][2] + margin * 4), a_min=0, + a_max=256) # leave more margins at the bottom + else: + # print(file_name) + + DL_failed_detect_flag = True + + if DL_failed_detect_flag: + img_name = os.path.join(out_folder_mismatched_image, file_name) + if not finding_lungs_non_DL_approach_and_save(raw_img, file_name): + # save file name only if both methods are not detected + image_misdetect_list.append(file_name) + print(file_name) + else: + img_name = os.path.join(out_folder_matched_img, file_name) + cropped = cv2.resize(raw_img, im_shape)[top_edge:bottom_edge, left_edge:right_edge] + # print(cropped) + resized_cropped = cv2.resize(cropped, im_shape) + cv2.imwrite(img_name, resized_cropped) + + # if mis_detected_flag: + # mis_detected_flag = False + # fig, ax = plt.subplots(2, 2, figsize=[12, 12]) + # ax[0, 0].set_title("Original " + file_name) + # ax[0, 0].imshow(raw_img, cmap='gray') + # ax[0, 0].axis('off') + # ax[0, 1].set_title("Threshold " + file_name) + # ax[0, 1].imshow(thresh_img, cmap='gray') + # # ax[0, 1].imshow(prediction, cmap='gray') + # ax[0, 1].axis('off') + # ax[1, 0].set_title("Color Labels " + file_name) + # ax[1, 0].imshow(labels) + # ax[1, 0].axis('off') + # ax[1, 1].set_title("Apply Mask on Original " + file_name) + # + # ax[1, 1].imshow(resized_cropped, cmap='gray') + # ax[1, 1].axis('off') + + if index > 112120: # for debug purpose + break + + if index % 100 == 0: + df = pd.DataFrame({'col': image_misdetect_list}) + df.to_csv(mis_detected_csv_path, header=False, index=False) + +df = pd.DataFrame({'col': image_misdetect_list}) +df.to_csv(mis_detected_csv_path, header=False, index=False) diff --git a/Predict-Lung-Disease-master/src/finding_lungs/non_PA_AP_view_samples.png b/Predict-Lung-Disease-master/src/finding_lungs/non_PA_AP_view_samples.png new file mode 100644 index 0000000..0d140a5 Binary files /dev/null and b/Predict-Lung-Disease-master/src/finding_lungs/non_PA_AP_view_samples.png differ diff --git a/Predict-Lung-Disease-master/src/finding_lungs/rotated_images_samples.png b/Predict-Lung-Disease-master/src/finding_lungs/rotated_images_samples.png new file mode 100644 index 0000000..618f4f8 Binary files /dev/null and b/Predict-Lung-Disease-master/src/finding_lungs/rotated_images_samples.png differ diff --git a/Predict-Lung-Disease-master/src/score_image_and_cam.py b/Predict-Lung-Disease-master/src/score_image_and_cam.py new file mode 100644 index 0000000..ca1b40a --- /dev/null +++ b/Predict-Lung-Disease-master/src/score_image_and_cam.py @@ -0,0 +1,215 @@ +# This script generates the scoring and schema files +# Creates the schema, and holds the init and run functions needed to +# operationalize the chestXray model + + +import os, sys, pickle, base64 +import keras.models +import keras.layers +import keras_contrib.applications.densenet +import pandas as pd +import numpy as np +import azure_chestxray_utils, azure_chestxray_cam + +#################################### +# Parameters +#################################### +global chest_XRay_model +global as_string_b64encoded_pickled_data_column_name +as_string_b64encoded_pickled_data_column_name = 'encoded_image' +global densenet_weights_file_name +# densenet_weights_file_name = 'weights_only_chestxray_model_14_weights_712split_epoch_029_val_loss_147.7599.hdf5' +densenet_weights_file_name = 'weights_only_chestxray_model_14_weights_712split_epoch_029_val_loss_147.7599 - Copy.hdf5' + +# Import data collection library. Only supported for docker mode. +# Functionality will be ignored when package isn't found +try: + from azureml.datacollector import ModelDataCollector +except ImportError: + print("Data collection is currently only supported in docker mode. May be disabled for local mode.") + # Mocking out model data collector functionality + class ModelDataCollector(object): + def nop(*args, **kw): pass + def __getattr__(self, _): return self.nop + def __init__(self, *args, **kw): return None + pass + +#################################### +# Utils +#################################### +def as_string_b64encoded_pickled(input_object): + #b64encode returns bytes class, make it string by calling .decode('utf-8') + return (base64.b64encode(pickle.dumps(input_object))).decode('utf-8') + +def unpickled_b64decoded_as_bytes(input_object): + if input_object.startswith('b\''): + input_object = input_object[2:-1] + # make string bytes + input_object = input_object.encode('utf-8') + #decode and the unpickle the bytes to recover original object + return (pickle.loads(base64.b64decode(input_object))) + +def get_image_score_and_serialized_cam(crt_cv2_image, crt_chest_XRay_model): + prj_consts = azure_chestxray_utils.chestxray_consts() + crt_cv2_image = azure_chestxray_utils.normalize_nd_array(crt_cv2_image) + crt_cv2_image = 255*crt_cv2_image + crt_cv2_image=crt_cv2_image.astype('uint8') + predictions, cam_image, predicted_disease_index = \ + azure_chestxray_cam.get_score_and_cam_picture(crt_cv2_image, crt_chest_XRay_model) + blended_image = azure_chestxray_cam.process_cam_image(cam_image, crt_cv2_image) + serialized_image = azure_chestxray_cam.plot_cam_results(blended_image, cam_image, crt_cv2_image, \ + prj_consts.DISEASE_list[predicted_disease_index]) + return predictions, serialized_image + +#################################### +# API functions +#################################### + +# Prepare the web service definition by authoring +# init() and run() functions. Test the functions +# before deploying the web service. +def init(): + try: + print("init() method: Python version: " + str(sys.version)) + print("crt Dir: " + os.getcwd()) + + import pip + # pip.get_installed_distributions() + myDistr = pip.get_installed_distributions() + type(myDistr) + for crtDist in myDistr: + print(crtDist) + + # load the model file + global chest_XRay_model + chest_XRay_model = azure_chestxray_utils.build_DenseNetImageNet201_model() + chest_XRay_model.load_weights(densenet_weights_file_name) + print('Densenet model loaded') + + except Exception as e: + print("Exception in init:") + print(str(e)) + +def run(input_df): + try: + import json + + debugCounter = 0 + print("run() method: Python version: " + str(sys.version) ); print('Step '+str(debugCounter));debugCounter+=1 + + print ('\ninput_df shape {}'.format(input_df.shape)) + print(list(input_df)) + print(input_df) + + input_df = input_df[as_string_b64encoded_pickled_data_column_name][0]; print('Step '+str(debugCounter));debugCounter+=1 + input_cv2_image = unpickled_b64decoded_as_bytes(input_df); print('Step '+str(debugCounter));debugCounter+=1 + + #finally scoring + predictions, serialized_cam_image = get_image_score_and_serialized_cam(input_cv2_image, chest_XRay_model) + #predictions = chest_XRay_model.predict(input_cv2_image[None,:,:,:]) + + # prediction_dc.collect(ADScores) + outDict = {"chestXrayScore": str(predictions), "chestXrayCAM":as_string_b64encoded_pickled(serialized_cam_image)} + return json.dumps(outDict) + except Exception as e: + return(str(e)) + + +#################################### +# main function can be used for test and demo +#################################### +def main(): + from azureml.api.schema.dataTypes import DataTypes + from azureml.api.schema.sampleDefinition import SampleDefinition + from azureml.api.realtime.services import generate_schema + + print('Entered main function:') + print(os.getcwd()) + + amlWBSharedDir = os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'] + print(amlWBSharedDir) + + def get_files_in_dir(crt_dir): + return( [f for f in os.listdir(crt_dir) if os.path.isfile(os.path.join(crt_dir, f))]) + + fully_trained_weights_dir=os.path.join( + amlWBSharedDir, + os.path.join(*(['chestxray', 'output', 'trained_models_weights']))) + crt_models = get_files_in_dir(fully_trained_weights_dir) + print(fully_trained_weights_dir) + print(crt_models) + + test_images_dir=os.path.join( + amlWBSharedDir, + os.path.join(*(['chestxray', 'data', 'ChestX-ray8', 'test_images']))) + test_images = get_files_in_dir(test_images_dir) + print(test_images_dir) + print(len(test_images)) + + # score in local mode (i.e. here in main function) + model = azure_chestxray_utils.build_DenseNetImageNet201_model() + model.load_weights(os.path.join( + fully_trained_weights_dir, densenet_weights_file_name)) + + print('Model weoghts loaded!') + + import cv2 + cv2_image = cv2.imread(os.path.join(test_images_dir,test_images[0])) + x, serialized_cam_image = get_image_score_and_serialized_cam(cv2_image, model) + file_bytes = np.asarray(bytearray(serialized_cam_image.read()), dtype=np.uint8) + recovered_image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR) + + # x = model.predict(cv2_image[None,:,:,:]) + print(test_images[0]) + print(x) + print(recovered_image.shape) + + # score in local mode (i.e. here in main function) using encoded data + encoded_image = as_string_b64encoded_pickled(cv2_image) + df_for_api = pd.DataFrame(data=[[encoded_image]], columns=[as_string_b64encoded_pickled_data_column_name]) + del encoded_image + del cv2_image + del serialized_cam_image + + input_df = df_for_api[as_string_b64encoded_pickled_data_column_name][0] + input_cv2_image = unpickled_b64decoded_as_bytes(input_df); + x, serialized_cam_image = get_image_score_and_serialized_cam(input_cv2_image, model) + file_bytes = np.asarray(bytearray(serialized_cam_image.read()), dtype=np.uint8) + recovered_image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR) + + # x = model.predict(input_cv2_image[None,:,:,:]) + print('After encoding and decoding:') + print(x) + print(recovered_image.shape) + + del model + + # now create the post deployment env, i.e. score using init() and run() + crt_dir = os.getcwd() + working_dir = os.path.join(crt_dir, 'tmp_cam_deploy') + if not os.path.exists(working_dir): + os.makedirs(working_dir) + + import shutil + shutil.copyfile( + os.path.join( fully_trained_weights_dir,densenet_weights_file_name), + os.path.join( working_dir,densenet_weights_file_name)) + + os.chdir(working_dir) + + # Turn on data collection debug mode to view output in stdout + os.environ["AML_MODEL_DC_DEBUG"] = 'true' + + # Test the output of the functions + init() + print("Result: " + run(df_for_api)) + + # #Generate the schema + data_for_schema = {"input_df": SampleDefinition(DataTypes.PANDAS, df_for_api)} + schema_file = os.path.join(fully_trained_weights_dir, 'chest_XRay_cam_service_schema.json') + generate_schema(run_func=run, inputs=data_for_schema, filepath=schema_file) + print("Schema saved in " +schema_file) + + +if __name__ == "__main__": + main() diff --git a/_posts/README.md b/_posts/README.md new file mode 100644 index 0000000..39bddb5 --- /dev/null +++ b/_posts/README.md @@ -0,0 +1,74 @@ +# Predict-Lung-Disease-through-Chest-X-Ray +We obtain this repository by refactoring the [code](https://github.com/Azure/AzureChestXRay) for the blog post [Using Microsoft AI to Build a Lung-Disease Prediction Model using Chest X-Ray Images](https://blogs.technet.microsoft.com/machinelearning/2018/03/07/using-microsoft-ai-to-build-a-lung-disease-prediction-model-using-chest-x-ray-images/). This instruction aims to help newcomers build the system in a very short time. +# Installation +1. Clone this repository + ```Shell + git clone https://github.com/svishwa/crowdcount-mcnn.git + ``` + We'll call the directory that you cloned PredictLungDisease `ROOT` + +2. All essential dependencies should be installed:pickle, random, re, tqdm, cv2, numpy, pandas, sklearn, keras, tensorflow, keras_contrib, collections.counter. + +# Data set up +1. Download the NIH Chest X-ray Dataset from here: + https://nihcc.app.box.com/v/ChestXray-NIHCC. + You need to get all the image files (all the files under `images` folder in NIH Dataset), `Data_Entry_2017.csv` file, as well as the Bounding Box data `BBox_List_2017.csv`. + +2. Create Directory + ```Shell + mkdir ROOT/azure-share/chestxray/data/ChestX-ray8/ChestXray-NIHCC + mkdir ROOT/azure-share/chestxray/data/ChestX-ray8/ChestXray-NIHCC_other + ``` +3. Save all images under `ROOT/azure-share/chestxray/data/ChestX-ray8/ChestXray-NIHCC` + +4. Save `Data_Entry_2017.csv` and `BBox_List_2017.csv` under `ROOT/azure-share/chestxray/data/ChestX-ray8/ChestXray-NIHCC_other` + +5. Process the Data + ```Shell + mkdir ROOT/azure-share/chestxray/output/data_partitions + ``` + Run `000_preprocess.py` to create `*.pickle` files under this directory +# Test +1. We have provided the pretrained-model `azure_chest_xray_14_weights_712split_epoch_054_val_loss_191.2588.hdf5` under `ROOT/azure-share/chestxray/output/fully_trained_models`. You can also download it separately from [here](https://chestxray.blob.core.windows.net/chestxraytutorial/tutorial_xray/chexray_14_weights_712split_epoch_054_val_loss_191.2588.hdf5). + +2. Run `020_evaluate.py` and it will create `weights_only_azure_chest_xray_14_weights_712split_epoch_054_val_loss_191.2588.hdf5` saving weights of the pretrained-model under the same directory. + +3. Below is the result showing the AUC score of all the 14 diseases: + + | Disease | Our AUC Score | Stanford AUC Score | Delta + |--------------------|------------------|--------------------|-----------: + | Atelectasis | 0.822334 | 0.8094 | -0.012934 + | Cardiomegaly | 0.933610 | 0.9248 | -0.008810 + | Effusion | 0.882471 | 0.8638 | -0.018671 + | Infiltration | 0.744504 | 0.7345 | -0.010004 + | Mass | 0.858467 | 0.8676 | 0.009133 + | Nodule | 0.784230 | 0.7802 | -0.004030 + | Pneumonia | 0.800054 | 0.7680 | -0.032054 + | Pneumothorax | 0.829764 | 0.8887 | 0.058936 + | Consolidation | 0.811969 | 0.7901 | -0.021869 + | Edema | 0.894102 | 0.8878 | -0.006302 + | Emphysema | 0.847477 | 0.9371 | 0.089623 + | Fibrosis | 0.882602 | 0.8047 | -0.077902 + | Pleural Thickening | 1.000000 | 0.8062 | -0.193800 + | Hernia | 0.916610 | 0.9164 | -0.000210 + +# Visualization +1. Create Folder Test + ```Shell + mkdir ROOT/azure-share/chestxray/data/ChestX-ray8/test_images + ``` + Copy any number of images under `ChestXray-NIHCC` to `test_images` and resize them to 224x224 pixels. + +2. Run `004_cam_simple.py` and it will output a Class Activation Map(CAM). The CAM lets us see which regions in the image were relevant to this class. + + ![这里随便写文字](https://github.com/fatLime/Predict-Lung-Disease/blob/master/image.png) + +# Referenced Paper +- Baseline result: https://arxiv.org/abs/1705.02315 +- Image Localization: http://arxiv.org/abs/1512.04150 +- The original chexnet paper mentioned in [StanfordML website](https://stanfordmlgroup.github.io/projects/chexnet/) as well as their [paper](https://arxiv.org/abs/1711.05225). +- http://cs231n.stanford.edu/reports/2017/pdfs/527.pdf for pre-processing the data +- https://arxiv.org/abs/1711.08760 for some other thoughts on the model architecture and the relationship between different diseases + +# Notes + Please contact yanhaotian@bupt.edu.cn if you have any problem.