-
Notifications
You must be signed in to change notification settings - Fork 34
/
Copy pathprediction.py
222 lines (203 loc) · 7.53 KB
/
prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#coding:utf-8
import os
import sys
import numpy as np
import pandas as pd
import tensorflow as tf
FEATURE_LIST = [
'Number.of.comorbidities',
'Lactate.dehydrogenase',
'Age',
'NLR',
'Creatine.kinase',
'Direct.bilirubin',
'Malignancy',
'X.ray.abnormality',
'COPD',
'Dyspnea',]
MODEL_PATH = './models'
class Model(object):
def __init__(self,
model_path,
config={}):
self.init_model(model_path, config)
self.set_names(config)
#print([n.name for n in tf.get_default_graph().as_graph_def().node])
def init_model(self, model_path, config={}):
model_name = config.get('model_name','fold_0_fea_10_layer_3')
graph_path = os.path.join(model_path, model_name+'.meta')
saver = tf.train.import_meta_graph(graph_path)
sess = tf.Session()
#sess.run(tf.global_variables_initializer())
saver.restore(sess, os.path.join(model_path,model_name))
self.sess = sess
return
def set_names(self,config={}):
self.input_vars = config.get('input_vars',
['Malignancy',
'X.ray.abnormality',
'COPD',
'Dyspnea',
'Number.of.comorbidities',
'Lactate.dehydrogenase',
'Age',
'NLR',
'Creatine.kinase',
'Direct.bilirubin']
)
if config.get('scope',None) is not None:
scope_name = '%s/'%config['scope']
else:
scope_name = ''
self.input_tensor_name = config.get('input_tensor_name','%sX-Input:0'%scope_name)
self.output_tensor_name = config.get('output_tensor_name','%shidden_layers/layer3/Tanh:0'%scope_name)
graph = tf.get_default_graph()
self._input = graph.get_tensor_by_name(self.input_tensor_name)
self._output = graph.get_tensor_by_name(self.output_tensor_name)
self._keepprob = graph.get_tensor_by_name('%sPlaceholder:0'%scope_name)
def predict(self, val_dict):
X_list = []
for vname in self.input_vars:
assert vname in val_dict
X_list.append(val_dict[vname])
X_np = np.array(X_list)
if len(X_np.shape) == 1:
X_np = X_np.reshape((1,-1))
else:
X_np = X_np.transpose()
risk_score = self.sess.run(self._output, feed_dict={self._input: X_np, self._keepprob:1.0})
return risk_score
class Model_NFold(object):
def __init__(self, model_path,
config={}):
"""
Arguments:
model_path is the path containing the fold_0, fold_1, etc folders
config: can leave it blank to use the default values
"""
self.config = config
self.fill_default_config()
self.init_models(model_path)
def fill_default_config(self):
self.config['model_name'] = self.config.get('model_name','model')
self.config['fold_list'] = self.config.get('fold_list',['fold_%d'%x for x in range(5)])
def init_models(self, model_path):
tf.reset_default_graph()
self.models = {}
for fold in self.config['fold_list']:
basename_model = os.path.join(model_path,
'%s'%fold)
scope_name = os.path.basename(model_path)+'_%s'%fold
with tf.variable_scope(scope_name):
self.config['scope'] = scope_name
self.models[scope_name] = Model(basename_model, self.config)
def predict(self, val_dict):
res = None
for fold in self.models:
with tf.variable_scope(fold):
result = self.models[fold].predict(val_dict)
if res is None:
res = result
else:
res += result
res /= len(self.config['fold_list'])
return res
class Model_COX_DL(object):
def __init__(self, config={}):
"""
Arguments:
model_path is the path containing the fold_0, fold_1, etc folders
config: can leave it blank to use the default values
"""
self.set_params(config)
def set_params(self,config={}):
self.input_vars = config.get('input_vars_and_coef',
[('Malignancy', 1.06741622),
('X.ray.abnormality', 0.65624252),
('COPD', 0.28010291),
('Dyspnea', 0.34496069),
('Number.of.comorbidities', 0.06626169),
('Lactate.dehydrogenase', 0.04888803),
('Age', 0.09156370),
('NLR', 0.02040950),
('Creatine.kinase', 0.05368482),
('Direct.bilirubin', 0.03486548),
('DL.feature', 1.50393931)]
)
self.cumulative_base_hazard = config.get('cumulative_base_hazard',
[('5days', 0.03552347),
('10days', 0.04164459),
('30days', 0.04840961),
]
)
def predict(self, val_dict):
lpnew = None
for var, coef in self.input_vars:
if lpnew is None:
lpnew = val_dict[var]*coef
else:
lpnew += val_dict[var]*coef
prob = {}
for day, coef in self.cumulative_base_hazard:
prob[day] = 1-np.exp(-np.exp(lpnew)*coef)
return {'prob':prob, 'score':lpnew}
def predict_batch_nfold(
fname_input,
fname_output,
):
'''
Predict survival probability
Arguments:
fname_input: string, input csv file name
file should contain following columns (normalized value, NOT raw value):
'patient_ID'
'Number.of.comorbidities'
'Lactate.dehydrogenase'
'Age'
'NLR'
'Creatine.kinase'
'Direct.bilirubin'
'Malignancy'
'X.ray.abnormality'
'COPD'
'Dyspnea'
fname_output: string, output csv file name
Return:
results will be saved in fname_output
'''
print('Predict survival rate')
df = pd.read_csv(fname_input)
for var in FEATURE_LIST:
assert var in df
print('--date format check: pass')
test_data = {}
for var in FEATURE_LIST:
test_data[var]=df[var].values
# offline model:
model = Model_NFold(MODEL_PATH)
# final cox dl model:
cox = Model_COX_DL()
print('--load models: done')
# compute 5 fold deepsurv
dl = model.predict(test_data)
# append dl result to the dictionary
test_data['DL.feature'] = dl.flatten()
print('--predict deepsurv model: done')
# compute final probabilities
res = cox.predict(test_data)
print('--predict final result: done')
# save result
df['score_dl'] = dl
df['score_final'] = res['score']
df['survival_5days'] = 1-res['prob']['5days']
df['survival_10days'] = 1-res['prob']['10days']
df['survival_30days'] = 1-res['prob']['30days']
df.to_csv(fname_output)
print('--file saved to:',fname_output)
def print_help():
print('Usage: python prediction.py input_normalized.csv')
if __name__=='__main__':
if len(sys.argv) < 2:
print_help()
else:
predict_batch_nfold(sys.argv[1], sys.argv[1]+'_prediction.csv')