-
Notifications
You must be signed in to change notification settings - Fork 57
/
Copy pathtest_model.py
188 lines (159 loc) · 7.52 KB
/
test_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import caffe
import numpy as np
import cv2
import sg_utils as utils
import cap_eval_utils
from IPython.core.debugger import Tracer
# import caffe
def load_model(prototxt_file, model_file, base_image_size, mean, vocab):
"""
Load the model from file. Includes pointers to the prototxt file,
caffemodel file name, and other settings - image mean, base_image_size, vocab
"""
model = {};
model['net']= caffe.Net(prototxt_file, model_file, caffe.TEST);
model['base_image_size'] = base_image_size;
model['means'] = mean; model['vocab'] = vocab;
return model
def output_words_image(threshold_metric, output_metric, min_words, threshold, vocab, is_functional):
ind_output = np.argsort(threshold_metric)
ind_output = ind_output[::-1]
must_keep1 = threshold_metric[ind_output] >= threshold;
must_keep2 = np.cumsum(is_functional[ind_output]) < 1+min_words;
ind_output = [ind for j, ind in enumerate(ind_output) if must_keep1[j] or must_keep2[j]]
out = [(vocab['words'][ind], output_metric[ind], threshold_metric[ind]) for ind in ind_output]
return out
def output_words(imdb, detection_file, eval_file, vocab, \
threshold_metric_name, output_metric_name, threshold, min_words, output_file = None, \
functional_words = ['a', 'on', 'of', 'the', 'in', 'with', 'and', 'is', 'to', 'an', 'two', 'at', 'next', 'are']):
"""
Output the words as generated by the model. Loads the detections from
detection_file, score precision mapping from eval_file and output the words
in output_file. Words in the output_file are sorted according to the
threshold_metric_name and report the output_metric_name.
"""
dt = utils.load_variables(detection_file);
pt = utils.load_variables(eval_file);
is_functional = np.array([x not in functional_words for x in vocab['words']]);
prec = np.zeros(dt['mil_prob'].shape)
for jj in xrange(prec.shape[1]):
prec[:,jj] = cap_eval_utils.compute_precision_score_mapping(\
pt['details']['score'][:,jj].copy(), \
pt['details']['precision'][:,jj].copy(), \
dt['mil_prob'][:,jj]);
utils.tic_toc_print(1, 'compute precision score mapping: {:4d} / {:4d}'.format(jj, prec.shape[1]))
dt['prec'] = prec;
out_all = []
for i in xrange(imdb.num_images):
out = output_words_image(dt[threshold_metric_name][i,:], dt[output_metric_name][i,:], \
min_words, threshold, vocab, is_functional)
out_all.append(out)
utils.tic_toc_print(1, 'output words image: {:4d} / {:4d}'.format(i, imdb.num_images))
if output_file is not None:
with open(output_file, 'wt') as f:
for i in xrange(imdb.num_images):
f.write('{:d}: '.format(imdb.image_index[i]))
out = out_all[i]
for j in xrange(len(out)):
f.write('{:s} ({:.2f}), '.format(out[j][0], out[j][1]))
f.write('\n')
def test_model(imdb, model, detection_file = None):
"""
Tests model and stores detections on disk
"""
N_WORDS = len(model['vocab']['words'])
sc = np.zeros((imdb.num_images, N_WORDS), dtype=np.float)
mil_prob = np.zeros((imdb.num_images, N_WORDS), dtype=np.float)
for i in xrange(len(imdb.image_index)):
im = cv2.imread(imdb.image_path_at(i))
sc[i,:], mil_prob[i,:] = test_img(im, model['net'], model['base_image_size'], model['means'])
utils.tic_toc_print(60, 'test_img : {:6d}/{:6d}'.format(i, len(imdb.image_index)))
if detection_file is not None:
utils.save_variables(detection_file, [sc, mil_prob, model['vocab'], imdb],
['sc', 'mil_prob', 'vocab', 'imdb'], overwrite = True)
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None):
# Get ground truth
# dt = utils.scio.loadmat(detection_file)
dt = utils.load_variables(detection_file)
mil_prob = dt['mil_prob'];
# Benchmark the output, and return a result struct
n_words = len(vocab['words'])
P = np.zeros(mil_prob.shape, dtype = np.float)
R = np.zeros(mil_prob.shape, dtype = np.float)
score = np.zeros(mil_prob.shape, dtype = np.float)
ap = np.zeros((1, n_words), dtype = np.float)
human_prec = np.zeros((1, n_words), dtype = np.float)
human_rec = np.zeros((1, n_words), dtype = np.float)
prec_at_human_rec = np.zeros((1, n_words), dtype = np.float)
rec_at_human_prec = np.zeros((1, n_words), dtype = np.float)
rec_at_half_prec = np.zeros((1, n_words), dtype = np.float)
prec_at_human_rec[...] = np.nan
for i in range(len(vocab['words'])):
utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
human_prec[0,i], human_rec[0,i] = cap_eval_utils.human_agreement(gt_label[:,i], num_references)
ind = np.where(R[:,i] >= human_rec[0,i])[0]
if len(ind) > 0:
ind = np.sort(ind)
prec_at_human_rec[0,i] = P[ind[0], i];
ind = np.where(P[:,i] >= human_prec[0,i])[0]
if len(ind) > 0:
ind = np.sort(ind)
rec_at_human_prec[0,i] = R[ind[-1], i];
ind = np.where(P[:,i] >= 0.5)[0]
if len(ind) > 0:
ind = np.sort(ind)
rec_at_half_prec[0,i] = R[ind[-1], i];
# # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100)
details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec};
# Collect statistics over the POS
agg = [];
for pos in list(set(vocab['poss'])):
ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \
format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
100*np.mean(human_prec[0, ind]))
agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
'human_prec': 100*np.mean(human_prec[0, ind])})
ind = range(len(vocab['words'])); pos = 'all';
print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \
format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
100*np.mean(human_prec[0, ind]))
agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
'human_prec': 100*np.mean(human_prec[0, ind])})
if eval_file is not None:
utils.save_variables(eval_file, [details, agg, vocab, imdb],
['details', 'agg', 'vocab', 'imdb'], overwrite = True)
return details
def test_img(im, net, base_image_size, means):
"""
Calls Caffe to get output for this image
"""
# Resize image
im_orig = im.astype(np.float32, copy=True)
im_orig -= means
im, gr, grr = upsample_image(im_orig, base_image_size)
im = np.transpose(im, axes = (2, 0, 1))
im = im[np.newaxis, :, :, :]
# Pass into Caffe
net.forward(data=im.astype(np.float32, copy=False))
# Get outputs and return them
mil_prob= net.blobs['mil'].data.copy()
sc = net.blobs['mil_max'].data.copy()
# reshape appropriately
mil_prob = mil_prob.reshape((1, mil_prob.size))
sc = sc.reshape((1, sc.size))
return sc, mil_prob
def upsample_image(im, sz):
h = im.shape[0]
w = im.shape[1]
s = np.float(max(h, w))
I_out = np.zeros((sz, sz, 3), dtype = np.float);
I = cv2.resize(im, None, None, fx = np.float(sz)/s, fy = np.float(sz)/s, interpolation=cv2.INTER_LINEAR);
SZ = I.shape;
I_out[0:I.shape[0], 0:I.shape[1],:] = I;
return I_out, I, SZ