-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgenerate_data.py
533 lines (423 loc) · 23.5 KB
/
generate_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
import argparse
import os
import random
import glob
import numpy as np
import pathlib
from pilmoji import Pilmoji
from input_data.twemoji_list import twemoji_list
from PIL import Image, ImageDraw, ImageFont, ImageStat
from wordart_gen import wordart_gen_func
import get_non_overlapped_rect
from img_add_texture import add_texture
# fix seed for consistency in generated data
random.seed(0)
def list_files_recursively(directory):
for item in directory.iterdir():
# print(type(item), item)
if item.is_file():
yield item
elif item.is_dir():
yield from list_files_recursively(item)
return
def start_generation(
image_path,
text_samples_path,
output_path,
font_list,
wordart_probablility,
visualize_annotation,
save_annotation_flag
):
# Load text samples
with open(text_samples_path, 'r', encoding='utf-8') as f:
text_samples = f.read().splitlines()
# Loop over images
for i, image_path in enumerate(list_files_recursively(pathlib.Path(image_path))):
# Load image
img = Image.open(image_path).convert("RGBA")
img.thumbnail((1280, 1280))
img_width, img_height = img.size
aspect_ratio = img_width/img_height
if aspect_ratio < 1 or aspect_ratio > 21/9:
continue
min_dim = min(img_width, img_height)
# text outline thickness random with custom prob
min_max_thickness = [2, 20]
thick_range = np.arange(*min_max_thickness, 1, dtype=float)
# get probability distribution for thickness inversly proportional to the thickness
thick_prob = thick_range[::-1]**4
thick_prob /= np.sum(thick_prob)
thick_param = np.random.choice(thick_range, p=list(thick_prob))
# adjust thickness accordint to image dimension (width, height)
outline_thickness = int(0.001 * min_dim * thick_param)
# number of lines of text to add over image
num_lines = np.random.choice([1, 2, 3, 4, 5, 10,], p=[
0.05, 0.25, 0.3, 0.2, 0.15, 0.05], size=(1))[0]
overlapp_ck_annotations = []
annotations = []
annotations_yolo = []
# add texture to image with 10% probability
if random.random() < 0.1:
r, g, b, a = img.split()
rgb_image = Image.merge('RGB', (r, g, b))
augmented_img = add_texture(rgb_image, None)
r, g, b = augmented_img.split()
img = Image.merge('RGBA', (r, g, b, a))
# Instead of simple plain text use Microsoft WordArt like text over images with 10% probability
text_is_wordart = random.random() < wordart_probablility
if not(text_is_wordart):
for _ in range(num_lines):
# get text of each randomly picked line to be superimposed on image
text = random.choice(text_samples)
print("text: ", text)
# Generate random font parameters
font_size = int(0.01 * min_dim * (1+np.random.normal(10, 4)))
thickness = int(0.01 * min_dim * np.random.normal(30, 4))*font_size
font_path = random.choice(font_list)
print(font_path)
m = int(0.02 * min_dim) # margin
font = ImageFont.truetype(font_path, size=font_size)
try:
text_bbox = font.getmask(text).getbbox()
original_text_size = text_bbox[2] - \
text_bbox[0], text_bbox[3]-text_bbox[1]
except:
# sometimes the fonts are not supported or give no text_box
continue
# set text direction ttb : top to bottom | rtl: right to left
if random.random() < 0.3:
text_direction = 'ttb'
text_size = original_text_size[1], original_text_size[1]*len(text)
else:
text_direction = 'rtl'
text_size = original_text_size
# if the text size is too large to be fit inside image ignore the text and continue with another sample
if img_width - text_size[0] - 3*m < 0 or img_height - text_size[1] - 3*m < 0:
continue
# texts Overlap check we'll try to add the text multiple times at different locations and each time check if there is
# overlap with existing texts, if we couldnt find after 10 iteration, this text will get ignored
#-------------------------------------------------------------------------------------------------------
#------------------------COORDINATE SELECTION WITH OVERLAP CHECKING BEGINS-------------------------------
#-------------------------------------------------------------------------------------------------------
loop_count = 0
while loop_count < 10:
loop_count += 1
#chose coordinate for multi-line, case when more than one text in image
text_x = text_y = None
if len(overlapp_ck_annotations) > 0:
ax, ay, aw, ah, _ = overlapp_ck_annotations[-1]
# check if the x axis & y axis both are fine for right-bottom corner
if ax + aw + text_size[0] + 3*m < img_width and random.random() < 0.8:
if ay + ah + text_size[1] + 3*m < img_height:
if ax+aw-text_size[0] > 3:
if random.random() > 0.5:
text_x = ax
else:
text_x = ax+aw-text_size[0]
else:
text_x = ax
text_y = ay+ah+3*m
# check if the x axis & y axis both are fine for left-above corner
elif ax - text_size[0] - 3*m > 0:
if ay - text_size[1] - 3*m > 0:
text_x = ax-text_size[0]-3*m
text_y = ay + ah - text_size[1]
m = int(0.02 * min_dim) # margin
if text_x == None or text_y == None:
# for placing font text at corner
if font_size < int(0.01 * min_dim * (6)) and len(text) < 4:
rand = random.random()
if rand < 0.2:
text_x = random.randint(0, 2*m)
text_y = random.randint(0, 2*m)
elif rand < 0.4:
text_x = img_width - text_size[0] - 2*m
text_y = random.randint(m, 2*m)
else:
text_x = random.randint(m, img_width - text_size[0] - 2*m)
text_y = random.randint(m, img_height - text_size[1] - 2*m)
else:
text_x = random.randint(m, img_width - text_size[0] - 2*m)
text_y = random.randint(m, img_height - text_size[1] - 2*m)
# since we are superimposing multiple text while selecting the coordinates we need to make sure they are not overlapped
# for the current text check if it's overlapped with any other text already present in list overlapp_ck_annotations,
overlap = False
for annotation in overlapp_ck_annotations:
ax, ay, aw, ah, _ = annotation
if (text_x+text_size[0]+m < ax or ax+aw+m < text_x or text_y+text_size[1]+m < ay or ay+ah+m < text_y):
pass
else:
overlap = True
break
if not overlap:
break
# if overlap do not use that text
if overlap:
continue
#-------------------------------------------------------------------------------------------------------
#------------------------COORDINATE SELECTION WITH OVERLAP CHECKING ENDS-------------------------------
#-------------------------------------------------------------------------------------------------------
# Do we want colored text
colored = False
if random.random() < 0.5:
colored = True
# do we want text background (basically a rectangular background behind text)
rect_bk = False
if random.random() < 0.2:
rect_bk = True
# we would like to check if the text color is not significantly similar to the color area of image around it, since if text and image
# area has same color, the text might not be visible (for example -> black image & black font color -> text not visible)
# we check this by calculating contrast among image region and randomly selected text color
loop_count = 0
while loop_count < 20:
loop_count += 1
# Check the contrast ratio between the font color and the background color
# Crop image to the region where the text is going to be added
text_region = img.crop(
(text_x, text_y, text_x + text_size[0], text_y + text_size[1]))
# Compute the mean background color of the text region
bg_color = ImageStat.Stat(text_region).mean
#transparent
font_is_bigger = font_size > int(0.01 * min_dim * (14))
make_transparent_text = random.random() < 0.2 and font_is_bigger
if make_transparent_text:
alpha=random.randint(100, 128)
transparent_text_img = Image.new("RGBA", img.size, (255, 255, 255, 0))
draw_transparent_text = ImageDraw.Draw(transparent_text_img)
else:
alpha=255
if colored:
font_color = (random.randint(0, 255), random.randint(
0, 255), random.randint(0, 255), alpha) # Random font color
outline_color = (random.randint(0, 255), random.randint(
0, 255), random.randint(0, 255), alpha) # Random outline color
else:
rgb_values = text_region.getdata()
num_pixels = len(rgb_values)
sum_rgb = [sum(x) for x in zip(*rgb_values)]
mean_pixel_value = tuple(
map(int, [x / num_pixels for x in sum_rgb]))
font_color = (0, 0, 0)
outline_color = (0, 0, 0)
outline_thickness = 0
if sum(mean_pixel_value) / 3 < 128:
font_color = (200, 200, 200, alpha)
else:
font_color = (0, 0, 0, alpha)
# Compute the luminance of the font color
L1 = 0.2126 * font_color[2] + 0.7152 * \
font_color[1] + 0.0722 * font_color[0]
# Compute the luminance of the background color
L2 = 0.2126 * bg_color[2] + 0.7152 * \
bg_color[1] + 0.0722 * bg_color[0]
contrast_ratio = (max(L1, L2) + 0.05) / (min(L1, L2) + 0.05)
if contrast_ratio >= 6.5:
break
#agar rect_bk he toh ek hi baa bohot he loop
if not(rect_bk):
break
draw = ImageDraw.Draw(img)
# get rectanglular coordinates for drawn text
x1, y1, x2, y2 = draw.textbbox((text_x, text_y), text,
font=font, direction=text_direction)
# bouding box to represent the drawn text
x1 -= 3+outline_thickness
y1 -= 3+outline_thickness
x2 += 3+outline_thickness
y2 += 3+outline_thickness
if x1 < 0 or y1 < 0 or x2 > img_width or y2 > img_height:
continue
# draw the text with either transparent or no transparency
if make_transparent_text:
draw_transparent_text.text((text_x, text_y), text, font=font,
fill=outline_color, stroke_width=outline_thickness, direction=text_direction)
img = Image.alpha_composite(img, transparent_text_img)
else:
draw.text((text_x, text_y), text, font=font,
fill=outline_color, stroke_width=outline_thickness, direction=text_direction)
#shadow / boder of text
if random.random() < 0.1:
draw.text((int(text_x*1.005), int(text_y*1.005)), text, font=font,
fill=(192, 192, 192), direction=text_direction)
#rectangle box around text
if rect_bk:
rect_fill = (255-font_color[0], 255-font_color[1], 255-font_color[2])
radius = random.randint(-4, int(original_text_size[1]*0.8))
radius = max(0, radius)
radius = min(int(original_text_size[1]*0.5), radius)
rec_pad=min(0.02, m/2)
wp = x2-x1
try:
draw.rounded_rectangle(
(int(x1-wp*rec_pad), int(y1-wp*rec_pad), int(x2+wp*rec_pad), int(y2+wp*rec_pad)), fill=rect_fill, radius=radius)
except:
pass
# for augmentation we add emojis nearby texts
add_emoji = random.random() < 0.3 and font_size < int(0.01 * min_dim * (7))
if add_emoji:
emoji = " ".join(random.sample(twemoji_list, random.randint(1,3)))
try:
with Pilmoji(img) as pilmoji:
pilmoji.text((text_x, text_y), text+emoji, font=font,
fill=font_color, direction=text_direction)
except:
draw.text((text_x, text_y), text, font=font,
fill=font_color, direction=text_direction)
else:
draw.text((text_x, text_y), text, font=font,
fill=font_color, direction=text_direction)
xc = (x1+x2)/2
yc = (y1+y2)/2
w = int((x2-x1)*1.01)
h = int((y2-y1)*1.01)
# Add annotation in a image to list
overlapp_ck_annotations.append((x1, y1, w, h, text))
annotations.append((x1, y1, w, h, text))
assert overlapp_ck_annotations == annotations
xcN, wN = format(xc/img_width, '.6f'), format(w/img_width, '.6f')
ycN, hN = format(yc/img_height, '.6f'), format(h/img_height, '.6f')
category = 0 if text_direction == "rtl" else 1
if len(text) == 1:
category = 3
annotations_yolo.append((category, xcN, ycN, wN, hN))
# visualize superimposed text with the bounding box
if visualize_annotation and len(annotations):
img_for_visualize = img.copy()
draw_visualize = ImageDraw.Draw(img_for_visualize)
for annotation in annotations:
x1, y1, w, h, _ = annotation
draw_visualize.rectangle((x1, y1, x1+w, y1+h), outline=(255, 255, 255), width=2)
img_for_visualize.show()
input("Press Enter to continue...")
else:
# ----------------------------------- use WordArt to draw text on images ------------------------------------------------
text_sizes = []
text_images = []
raw_text_list = []
text_direction_list = []
for j in range(num_lines):
text = random.choice(text_samples)
try:
text_image, wordart_style = wordart_gen_func.generate_wordart_image(text)
except:
continue
# text = "to get approximate text size with font size later we will reszie word art according to this"
font_size = int(0.01 * min_dim * (1+random.randint(12, 18)))
font_path = random.choice(font_list)
font = ImageFont.truetype(font_path, size=font_size)
try:
text_bbox = font.getmask(text).getbbox()
except OSError:
continue
original_text_size = text_bbox[2] - \
text_bbox[0], text_bbox[3]-text_bbox[1]
# if its stact the word art is in top to bottom
if "stack" in wordart_style:
text_direction = 'ttb'
text_size = original_text_size[1], original_text_size[1]*len(text)
else:
text_direction = 'rtl'
text_size = original_text_size
if text_size[0]*1.2 > img_width or text_size[1]*1.2 > img_height:
continue
text_sizes.append(text_size)
text_image.thumbnail(original_text_size)
text_images.append(text_image)
raw_text_list.append(text)
text_direction_list.append(text_direction)
if text_sizes:
field_size = ((40,40), (img_width-40, img_height-40))
non_overlapped_boxs = get_non_overlapped_rect.generate_box(field_size, text_sizes)
if non_overlapped_boxs == None:
# couldnt adjust the boxes to fit in the image in non-overlapping manner
continue
for text_image, non_overlapped_box, raw_text, text_direction in zip(text_images, non_overlapped_boxs, raw_text_list, text_direction_list):
(x, y), (w, h) = non_overlapped_box
img.paste(text_image, (x, y), text_image)
xc = x + w/2
yc = y + h/2
w = int(w*1.01)
h = int(h*1.01)
xcN, wN = format(xc/img_width, '.6f'), format(w/img_width, '.6f')
ycN, hN = format(yc/img_height, '.6f'), format(h/img_height, '.6f')
annotations.append((x, y, w, h, raw_text))
category = 0 if text_direction == "rtl" else 1
if len(raw_text) == 1:
category = 3
annotations_yolo.append((category, xcN, ycN, wN, hN))
# visualize superimposed text with the bounding box
if visualize_annotation and len(non_overlapped_boxs):
img_for_visualize = img.copy()
draw_visualize = ImageDraw.Draw(img_for_visualize)
for text_image, non_overlapped_box in zip(text_images, non_overlapped_boxs):
(x, y), (w, h) = non_overlapped_box
draw_visualize.rectangle((x, y, x+text_image.size[0], y+text_image.size[1]), outline=(255, 255, 255), width=2)
img_for_visualize.show()
input("Press Enter to continue...")
# if no annotation/bounding-box proceed to next iteration
if len(annotations) == 0:
continue
print(annotations)
if len(annotations) > 0 and save_annotation_flag:
## Save image with superimposed text
annotated_image_name = os.path.join(
output_path, f'{i}'.zfill(4)+'.png')
img.save(annotated_image_name)
# continue
## Save ground for the text label & location to text file
annotation_file_name = os.path.join(
output_path, f'{i}'.zfill(4)+'_custom_format'+'.txt')
annotation_file_name_yolo = os.path.join(
output_path, f'{i}'.zfill(4)+'.txt')
with open(annotation_file_name, 'w') as f:
for annotation in annotations[:-1]:
f.write(
f'{annotation[0]},{annotation[1]},{annotation[2]},{annotation[3]},{annotation[4]}\n')
annotation = annotations[-1]
f.write(
f'{annotation[0]},{annotation[1]},{annotation[2]},{annotation[3]},{annotation[4]}')
with open(annotation_file_name_yolo, 'w') as f:
for annotation in annotations_yolo[:-1]:
f.write(
f'{annotation[0]} {annotation[1]} {annotation[2]} {annotation[3]} {annotation[4]}\n')
annotation = annotations_yolo[-1]
f.write(
f'{annotation[0]} {annotation[1]} {annotation[2]} {annotation[3]} {annotation[4]}')
def parse_arguments():
parser = argparse.ArgumentParser(description='Process image and text data.')
## Input/Output Paths
parser.add_argument('--raw_image_path', type=str, default='./input_data/raw_images',
help='Path to the directory containing the input images')
parser.add_argument('--text_samples_path', type=str, default='./input_data/text/japanese.txt',
help='Path to the text samples file')
parser.add_argument('--texture_imgs_path', type=str, default='./input_data/texture_images',
help='Path to the textures to be placed on image')
parser.add_argument('--output_path', type=str, default='output',
help='Path to save output images and annotations')
## if you want to save the output data
parser.add_argument('--save_annotation', action='store_true', default=False,
help='save the annotations')
## Fonts to be used
parser.add_argument('--font_path', type=str, default='fonts',
help='font directory')
## probablity of using wordart type text, for reference check -> https://www.makewordart.com/
parser.add_argument('--wordart_prob', type=float, default=0,
help='probablity of text to be wordart')
## if you want to visualize the annotations
parser.add_argument('--visualize', action='store_true', default=False,
help='visualize the annotations')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_arguments()
font_list = glob.glob(f'{args.font_path}/*.ttf')
start_generation(
args.raw_image_path,
args.text_samples_path,
args.output_path,
font_list,
args.wordart_prob,
args.visualize,
args.save_annotation
)