-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpreprocess_images.py
223 lines (183 loc) · 6.43 KB
/
preprocess_images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
import imgaug as ia
import pandas as pd
import os
import matplotlib.pyplot as plt
import cv2 as cv
from collections import namedtuple
import glob
PATH_TO_ANNO = '../annotations/'
ANNO_TABLE = 'train_labels.csv'
PATH_TO_IMAGES = '../images/'
PATH_TO_RESIZED_IMAGES = '../images/resized/'
PATH_TO_AUG_IMAGES = 'images/'
IMAGE_SIZE = (12, 8)
def get_images_bboxes(df=None, path_to_images=PATH_TO_AUG_IMAGES, head=None):
"""
the function get a list of images and bboxes
and print them side by side
"""
# make a list of subgroups, each subgroup is of one file
data = namedtuple('data', ['file', 'object'])
grouped = df.groupby('filename')
gd_data = []
file_naems = []
for file, x in zip(grouped.groups.keys(), grouped.groups):
gd_data.append(data(file, grouped.get_group(x)))
file_naems.append(file)
filenames = []
images = []
bboxes = []
classes = []
for g in gd_data:
grouped_image = g.object
H = int(grouped_image['height'].iloc[0])
W = int(grouped_image['width'].iloc[0])
filename = grouped_image['filename'].iloc[0]
# create list of bboxes object to each image
bboxes_on_an_image = []
classes_on_an_image = []
for idx, row in grouped_image.iterrows():
x1 = int(row['xmin'])
y1 = int(row['ymin'])
x2 = int(row['xmax'])
y2 = int(row['ymax'])
bboxes_on_an_image.append(ia.BoundingBox(x1, y1, x2, y2))
classes_on_an_image.append(row['class'])
bboxes.append(ia.BoundingBoxesOnImage(
bboxes_on_an_image, shape=(H, W)))
classes.append(classes_on_an_image)
filenames.append(filename)
# open an image
path = os.path.join(path_to_images, filename)
image = cv.imread(path)
image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
images.append(image)
return filenames, images, bboxes, classes
def imshow_bbox(images, bboxes, classes, head=None):
"""
plot images with bboxes
"""
GREEN = [0, 255, 0]
YELLOW = [255, 140, 0]
RED = [255, 0, 0]
WHITE = [255, 255, 255]
BLUE = [0, 0, 255]
GRAY = [192,192,192]
colors = [GREEN, YELLOW, WHITE, GRAY, BLUE, RED]
for i, (img, bbs, cl) in enumerate(zip(images, bboxes, classes)):
for j, bb in enumerate(bbs.bounding_boxes):
img = bb.draw_on_image(img, thickness=20, color=colors[cl[j]-1])
plt.figure(figsize=IMAGE_SIZE)
plt.imshow(img)
if head and (i+1 == head):
break
def resize_img_anno(path_to_images, output_path, df, W, H):
images = list(set(df['filename'].tolist()))
scale_anno(df, W, H)
# H = 225
# W = 300
resized_images = glob.glob(output_path + '*.JPG')
if resized_images:
return
for img in images:
temp = cv.imread(os.path.join(path_to_images, img))
temp = cv.resize(temp, (W, H), interpolation=cv.INTER_AREA)
new_name = os.path.join(
output_path, 's_' + img[img.find('\\')+1:])
cv.imwrite(new_name, temp)
def scale_anno(df, W, H):
"""
scale the DF to fit ssd
"""
Hi = 2736
Wi = 3648
def scale_x(x):
return int(x*(W/Wi))
df[['xmin', 'xmax']] = df[['xmin', 'xmax']].applymap(scale_x)
def scale_y(y):
return int(y*(H/Hi))
df[['ymin', 'ymax']] = df[['ymin', 'ymax']].applymap(scale_y)
df['height'] = H
df['width'] = W
df['filename'] = 's_' + df['filename']
# filename col as a grouped list
# return sorted(list(set(df['filename'].tolist())))
def create_anno(images, bboxes, classes, path_to_images, filenames=None, ssd=False):
"""
return a DF: col[filename,w,h,class,x1,y1,x2,y2]
if ssd: make adjustments to fit the written images
"""
anno = {}
anno['filename'] = []
anno['width'] = []
anno['height'] = []
anno['class'] = []
anno['xmin'] = []
anno['ymin'] = []
anno['xmax'] = []
anno['ymax'] = []
anno['path'] = []
if filenames:
filenames_o = filenames
else:
images_add = glob.glob(path_to_images + '*.JPG')
filenames_o = [s[s.find('\\')+1:] for s in images_add]
for i, (f, bb, img, cl) in enumerate(zip(filenames_o, bboxes, images, classes)):
bb = bb.remove_out_of_image().cut_out_of_image()
for box, c in zip(bb.bounding_boxes, cl):
anno['filename'].append(f)
anno['xmin'].append(box.x1)
anno['ymin'].append(box.y1)
anno['xmax'].append(box.x2)
anno['ymax'].append(box.y2)
anno['width'].append(img.shape[1])
anno['height'].append(img.shape[0])
anno['path'].append(path_to_images)
anno['class'].append(c)
df = pd.DataFrame(anno)
if ssd:
df['width'] = 300
df['height'] = 300
def scale_y(y):
return int(y + 37)
df[['ymin', 'ymax']] = df[['ymin', 'ymax']].applymap(scale_y)
def prefix_aug(f):
return str('aug_' + f)
df[['filename']] = df[['filename']].applymap(prefix_aug)
df = df[['filename', 'width', 'height', 'class',
'xmin', 'ymin', 'xmax', 'ymax', 'path']]
return df
def imwrite_images_to_path(images, filenames,
output_path=PATH_TO_AUG_IMAGES, ssd=False):
"""
write augmeted images to output_path
add zero padding to fit H=300
"""
if ssd:
top_offset = 37
bottom_offset = 38
prefix_aug = 'aug_'
else:
top_offset = 0
bottom_offset = 0
prefix_aug = ''
filenames = [prefix_aug + f for f in filenames]
for i, (filename, img) in enumerate(zip(filenames, images)):
temp = cv.copyMakeBorder(img, top_offset, bottom_offset,
0, 0, cv.BORDER_CONSTANT)
temp = cv.cvtColor(temp, cv.COLOR_BGR2RGB)
file = os.path.join(output_path, filename)
cv.imwrite(file, temp)
def imwrite_aug_ssd(images, filenames, bboxes, classes,
output_images, pad_to_300=False):
"""
1. padd augmented images and save them to disk
2. create an annoteation DF
3. save DF to a csv file
"""
imwrite_images_to_path(images, filenames,
output_path=output_images, ssd=pad_to_300)
df = create_anno(images, bboxes, classes,
path_to_images=output_images,
filenames=filenames, ssd=pad_to_300)
return df