-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathobject_detector.py
executable file
·569 lines (455 loc) · 22.3 KB
/
object_detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
#!/usr/bin/env python3
from copy import deepcopy
import open3d as o3d
import cv2
import numpy as np
from matplotlib import cm
from more_itertools import locate
import glob
import os
import random
import math
import open3d.visualization.gui as gui
import open3d.visualization.rendering as rendering
from Classifier import Classifier
from PIL import Image
from TTS import TTS
# define the view for visualize different results
view={
"class_name" : "ViewTrajectory",
"interval" : 29,
"is_loop" : False,
"trajectory" :
[
{
"boundingbox_max" : [ 3.0000000000000004, 3.0000000000000004, 3.0000000000000004 ],
"boundingbox_min" : [ -2.5373308564675199, -2.1335212159612991, -1.327641381237858 ],
"field_of_view" : 60.0,
"front" : [ -0.067906654377651754, -0.70142886552419004, -0.70949716905755311 ],
"lookat" : [ 0.23133457176624028, 0.43323939201935069, 0.83617930938107121 ],
"up" : [ -0.084219703952140443, 0.71263056388397028, -0.69646587919626657 ],
"zoom" : 0.69999999999999996
}
],
"version_major" : 1,
"version_minor" : 0
}
view_2 = {
"class_name" : "ViewTrajectory",
"interval" : 29,
"is_loop" : False,
"trajectory" :
[
{
"boundingbox_max" : [ 0.57766820479332304, 0.53542418579498685, 0.011344715613939613 ],
"boundingbox_min" : [ -0.64853301295736199, -0.58922634959095666, -0.33045609592173575 ],
"field_of_view" : 60.0,
"front" : [ 0.68255579678994682, -0.41451757792185218, -0.60190760242934471 ],
"lookat" : [ 0.0933088775718625, -0.12451645369686579, -0.23988061191282783 ],
"up" : [ -0.50625636487562431, 0.3258133621214645, -0.79846737321322425 ],
"zoom" : 0.69999999999999996
}
],
"version_major" : 1,
"version_minor" : 0
}
class PlaneDetection():
def __init__(self, point_cloud):
self.point_cloud = point_cloud
def colorizeInliers(self, r,g,b):
self.inlier_cloud.paint_uniform_color([r,g,b]) # paints the plane
def segment(self, distance_threshold=0.04, ransac_n=3, num_iterations=100):
print('Starting plane detection')
plane_model, inlier_idxs = self.point_cloud.segment_plane(distance_threshold=distance_threshold,
ransac_n=ransac_n,
num_iterations=num_iterations)
[self.a, self.b, self.c, self.d] = plane_model
self.inlier_cloud = self.point_cloud.select_by_index(inlier_idxs)
outlier_cloud = self.point_cloud.select_by_index(inlier_idxs, invert=True)
return outlier_cloud
def __str__(self):
text = 'Segmented plane from pc with ' + str(len(self.point_cloud.points)) + ' with ' + str(len(self.inlier_cloud.points)) + ' inliers. '
text += '\nPlane: ' + str(self.a) + ' x + ' + str(self.b) + ' y + ' + str(self.c) + ' z + ' + str(self.d) + ' = 0'
return text
def main():
# ------------------------------------------
# Initialization
# ------------------------------------------
print("Load a ply point cloud, print it, and render it")
# personal path to the Washington_RGB-D_Dataset containing: rgbd-scenes-v2/pc/.. and: rgbd-scenes-v2/imgs/..
personal_path = '/home/rafael/Desktop/Washington_RGB-D_Dataset'
# personal_path = '/home/igino/Desktop/SAVI_dataset/Washington_RGB-D_Dataset'
dataset_path = personal_path + '/rgbd-scenes-v2/pc'
point_cloud_filenames = glob.glob(dataset_path+'/*.ply')
point_cloud_filename = random.choice(point_cloud_filenames)
# uncomment this to manually select a scenario
point_cloud_filename = dataset_path+'/01.ply'
# find the correspondent rgb images
parts = point_cloud_filename.split('/')
part = parts[-1]
part = part.split('.')
part = part[0]
# take only the first image for the moment
image_path = personal_path + '/rgbd-scenes-v2/imgs/scene_'+str(part)+'/00000-color.png'
image = cv2.imread(image_path)
os.system('pcl_ply2pcd ' +point_cloud_filename+ ' pcd_point_cloud.pcd')
point_cloud_original = o3d.io.read_point_cloud('pcd_point_cloud.pcd')
point_cloud = deepcopy(point_cloud_original)
point_cloud_2 = deepcopy(point_cloud_original)# second copy for visualization
# ------------------------------------------
# Execution
# ------------------------------------------
number_of_planes = 2
minimum_number_points = 25
colormap = cm.Pastel1(list(range(0,number_of_planes)))
planes = []
while True: # run consecutive plane detections
plane = PlaneDetection(point_cloud)
point_cloud = plane.segment(distance_threshold=0.035, ransac_n=3, num_iterations=200) # new point cloud are the outliers of this plane detection
print(plane)
# colorization using a colormap
idx_color = len(planes)
color = colormap[idx_color, 0:3]
plane.colorizeInliers(r=color[0], g=color[1], b=color[2])
planes.append(plane)
if len(planes) >= number_of_planes: # stop detection planes
print('Detected planes >=' + str(number_of_planes))
break
elif len(point_cloud.points) < minimum_number_points:
print('number of remaining points <' + str(minimum_number_points))
break
# select table plane
table_plane = None
table_plane_mean_xy = 1000 # large number
for plane_idx, plane in enumerate(planes):
center = plane.inlier_cloud.get_center()
print('Cloud ' + str(plane_idx) + ' has center '+str(center))
mean_x = center[0]
mean_y = center[1]
mean_z = center[2]
mean_xy = abs(mean_x) + abs(mean_y)
if mean_xy < table_plane_mean_xy:
table_plane = plane
table_plane_mean_xy = mean_xy
# paint in red the table plane
table_plane.colorizeInliers(r=1,g=0,b=0)
# downsampling
table_plane_downsampled = table_plane.inlier_cloud.voxel_down_sample(voxel_size=0.01)
point_cloud_downsampled = point_cloud.voxel_down_sample(voxel_size=0.01)
print('after downsampling table point cloud has '+str(len(table_plane_downsampled.points))+'points')
# Clustering
cluster_idxs = list(table_plane_downsampled.cluster_dbscan(eps=0.08, min_points=50, print_progress=True))
possible_values = list(set(cluster_idxs))
if -1 in cluster_idxs:
possible_values.remove(-1)
print(possible_values)
# search for the biggest cluster
largest_cluster_num_points = 0
largest_cluster_idx = None
for value in possible_values:
num_points = cluster_idxs.count(value)
if num_points > largest_cluster_num_points:
largest_cluster_idx = value
largest_cluster_num_points = num_points
# search for the nearest cluster to the z axis (mean xy closest to zero)
nearest_z_cluster_idx = None
table_cloud_mean_xy = 1000
for value in possible_values:
idxs = list(locate(cluster_idxs, lambda x: x == value))
center = table_plane_downsampled.select_by_index(idxs).get_center()
# select the cluster closest to the z axis --> min x and min y
mean_x = center[0]
mean_y = center[1]
mean_z = center[2]
mean_xy = abs(mean_x) + abs(mean_y)
if mean_xy < table_cloud_mean_xy:
nearest_z_cluster_idx = value
table_cloud_mean_xy = mean_xy
largest_idxs = list(locate(cluster_idxs, lambda x: x == largest_cluster_idx))
nearest_z_idxs= list(locate(cluster_idxs, lambda x: x == nearest_z_cluster_idx))
# Choose the table: the table is the nearest to the z axis only if it has a certain amount of points, otherwise is the biggest clust
num_points = len(table_plane_downsampled.select_by_index(nearest_z_idxs).points)
if num_points >= 5000:
cloud_table = table_plane_downsampled.select_by_index(nearest_z_idxs)
else:
cloud_table = table_plane_downsampled.select_by_index(largest_idxs)
cloud_table.paint_uniform_color([0,1,0]) # paint in green the table
# ------------------------------------------
# crop the table
# ------------------------------------------
center = cloud_table.get_center()
# traslate point cloud
point_cloud.translate(-center)
# Calculate rotation angle between plane normal & z-axis
plane_normal = tuple([table_plane.a,table_plane.b,table_plane.c])
z_axis = (0,0,1)
rotation_angle = np.arccos(np.dot(plane_normal, z_axis) / (np.linalg.norm(plane_normal)* np.linalg.norm(z_axis)))
# Calculate rotation axis
plane_normal_length = math.sqrt(table_plane.a**2 + table_plane.b**2 + table_plane.c**2)
u1 = table_plane.b / plane_normal_length
u2 = -table_plane.a / plane_normal_length
rotation_axis = (u1, u2, 0)
# Generate axis-angle representation
optimization_factor = 1.1
axis_angle = tuple([x * rotation_angle * optimization_factor for x in rotation_axis])
# Rotate point cloud
R = point_cloud.get_rotation_matrix_from_axis_angle(axis_angle)
point_cloud.rotate(R, center=(0,0,0))
# Create a list of entities to draw
entities = [point_cloud_downsampled]
entities.append(table_plane_downsampled)
entities.append(cloud_table)
for x in entities:
x = x.translate(-center)
x = x.rotate(R,center=(0,0,0))
# take a bbox
obb = cloud_table.get_oriented_bounding_box()
box_points=obb.get_box_points()
box_points = np.asarray(box_points)
entities.append(obb)
# Extend in the z direction the box , since the axis are not perfectly alligned we need to make some calculation
# search for the couple of vertices (the ones with closest x and y)
couples = []
for i in range(np.shape(box_points)[0]):
xy = 0
min_difference = 100 #big number
for j in range(np.shape(box_points)[0]):
if i != j:
x_difference = abs(box_points[i,0]-box_points[j,0])
y_difference = abs(box_points[i,1]-box_points[j,1])
difference = x_difference+y_difference
if difference < min_difference:
min_difference = difference
couple = [i,j]
# save the couple
couple = sorted(couple)
if not couple in couples:
couples.append(couple)
couple = []
z = sorted(box_points[:,2])
# z axis is pointing from the table to the floor
high_z = z[4] # is the highest of the four bottom vertices
low_z = z[0] # is the smallest so the highest of the bbox
# Uniform the z values of the bbox
for i in range(len(couples)):
if box_points[couples[i][0],2] < box_points[couples[i][1],2]:
box_points[couples[i][0],2] = low_z
box_points[couples[i][1],2] = high_z
else:
box_points[couples[i][1],2] = low_z
box_points[couples[i][0],2] = high_z
# for each couple extend one vertices in z direction
if box_points[couples[i][0],2] < box_points[couples[i][1],2]:
box_points[couples[i][0],2] = box_points[couples[i][0],2] - 0.4
else:
box_points[couples[i][1],2] = box_points[couples[i][1],2] - 0.4
# From numpy to Open3D
box_points = o3d.utility.Vector3dVector(box_points)
bbox = o3d.geometry.OrientedBoundingBox.create_from_points(box_points)
bbox.color = (0, 1, 0)
entities.append(bbox)
# ------------------------------------------
# from the cropped table isolate the object
# ------------------------------------------
# take another copy of the original point cloud so it isn't downsampled
point_cloud_2.translate(-center)
point_cloud_2.rotate(R, center=(0,0,0))
table = point_cloud_2.crop(bbox)
# another plane detection to identify better the table plane
plane = PlaneDetection(table)
table= plane.segment(distance_threshold=0.02) #return outlier cloud
print(plane)
color = [0,0,1]
plane.colorizeInliers(r=color[0], g=color[1], b=color[2])
# define entities to draw (second representation)
entities_2 = [table]
# downsapling for clustering faster
table_plane_down= plane.inlier_cloud.voxel_down_sample(voxel_size=0.01)
entities_2.append(table_plane_down)
# clustering of the object
table_downsampled = table.voxel_down_sample(voxel_size=0.005)
cluster_idxs = list(table_downsampled.cluster_dbscan(eps=0.025, min_points=100, print_progress=True))
object_idxs = list(set(cluster_idxs))
object_idxs.remove(-1)
number_of_objects = len(object_idxs)
objects = []
for object_idx in object_idxs:
object_point_idxs = list(locate(cluster_idxs, lambda x: x == object_idx))
object_points = table_downsampled.select_by_index(object_point_idxs)
# Create a dictionary to represent the objects
d = {}
d['idx'] = str(object_idx)
d['points'] = object_points
# compute bbox of the object
d['center'] = d['points'].get_center()
d['bbox'] = d['points'].get_axis_aligned_bounding_box()
d['bbox'].color=(0,1,0)
# compute mean color of the object
mean_color = [0,0,0]
for color in np.asarray(object_points.colors):
mean_color=mean_color+color
mean_color = mean_color/np.asarray(object_points.colors).shape[0]
d['mean_color']=mean_color
# compute other properties of objects:
max_bound = d['points'].get_max_bound()
min_bound = d['points'].get_min_bound()
d['length'] = abs(abs(max_bound[0])-abs(min_bound[0]))
d['width'] = abs(abs(max_bound[1])-abs(min_bound[1]))
d['height'] = abs(abs(max_bound[2])-abs(min_bound[2]))
d['volume'] = d['length']*d['width']*d['height']
d['distance'] = math.sqrt(d['center'][0]**2 + d['center'][1]**2)
# select the object that are over the table, near the center of the table, and have a reasonable size
if d['center'][2] <= -0.03 and d['volume'] <= 0.03 and d['distance'] <= 0.6 :
objects.append(d) # add the dict of this object to the list
# Draw objects
for object_idx, object in enumerate(objects):
entities_2.append(object['bbox'])
# ------------------------------------------
# Visualization
# ------------------------------------------
# draw the xyz axis
frame = o3d.geometry.TriangleMesh().create_coordinate_frame(size=3.0, origin = np.array([0.,0.,0.]))
entities.append(frame)
o3d.visualization.draw_geometries(entities,
zoom=view['trajectory'][0]['zoom'],
front=view['trajectory'][0]['front'],
lookat=view['trajectory'][0]['lookat'],
up=view['trajectory'][0]['up'],
point_show_normal=False)
o3d.visualization.draw_geometries(entities_2,
zoom=view_2['trajectory'][0]['zoom'],
front=view_2['trajectory'][0]['front'],
lookat=view_2['trajectory'][0]['lookat'],
up=view_2['trajectory'][0]['up'],
point_show_normal=False)
# Make a more complex open3D window to show object labels on top of 3d
app = gui.Application.instance
app.initialize() # create an open3d app
w = app.create_window("Open3D - 3D Text", 1920, 1080)
widget3d = gui.SceneWidget()
widget3d.scene = rendering.Open3DScene(w.renderer)
widget3d.scene.set_background([0,0,0,1]) # set black background
material = rendering.Material()#Record()
material.shader = "defaultUnlit"
material.point_size = 2 * w.scaling
# define entities to draw in this representation
entities_3 = []
for object_idx, object in enumerate(objects):
entities_3.append(object['points'])
entities_3.append(object['bbox'])
for entity_idx, entity in enumerate(entities_3):
widget3d.scene.add_geometry("Entity " + str(entity_idx), entity, material)
# Draw labels over the objects
for object_idx, object in enumerate(objects):
label_pos = [object['center'][0], object['center'][1], object['center'][2] - object['height'] -0.1]
label_text = 'object idx: '+object['idx']+'\nheight: '+str(object['height']*100)+'\nwidth: '+str(object['width']*100)+'\nlength: '+str(object['length']*100)+'\nvolume: '+str(object['volume']*100*100*100)+'\ndistance from center table:'+str(object['distance']*100)+'\nmean color:'+str(object['mean_color'])
label = widget3d.add_3d_label(label_pos, label_text)
label.color = gui.Color(1,1,1)
bbox = widget3d.scene.bounding_box
widget3d.setup_camera(60.0, bbox, bbox.get_center())
w.add_child(widget3d)
app.run()
#--------------------------------------
# image processing
#--------------------------------------
# define camera parameters
intrinsic = np.array([[570.3, 0, 320],[ 0 , 570.3, 240],[0,0,1]])
rvec = np.eye(3)
tvec = np.float32([0,0,0]).reshape(1,3)
distCoeff = np.empty((1,4),dtype=float)
# apply the inverse transformation to return in the camera point of view
for object_idx, object in enumerate(objects):
object['center_wrt_cam'] = object['points'].rotate(np.transpose(R),center=(0,0,0))
object['center_wrt_cam']= object['center_wrt_cam'].translate(center)
object['center_wrt_cam']= object['center_wrt_cam'].get_center()
[img_points,_] = cv2.projectPoints(object['center_wrt_cam'], rvec, tvec, intrinsic,distCoeffs=distCoeff)
object['img_point']= img_points[0][0]
object['x_pix'] = int(object['img_point'][0])
object['y_pix'] = int(object['img_point'][1])
# validate the object view: in a image you may find overlapped objects
for object_idx, object in enumerate(objects):
for object_idx_2, object_2 in enumerate(objects):
if object['idx'] != object_2['idx'] and object['idx'] < object_2['idx']:
# if abs( abs(object['x_pix']) - abs(object_2['x_pix']) ) > 80 :
if math.sqrt( (abs(object['x_pix']) - abs(object_2['x_pix']))**2 + (abs(object['y_pix']) - abs(object_2['y_pix']))**2 ) > 80 :
object['valid'] = True
object_2['valid'] = True
else: # take the object closest to the camera meaning the one with highest y
if object['y_pix'] > object_2['y_pix']:
object['valid'] = True
object_2['valid'] = False
else:
object['valid'] = False
object_2['valid'] = True
# take object that are into the camera view
x_bound = image.shape[0]
y_bound = image.shape[1]
for object_idx, object in enumerate(objects):
if object['x_pix'] > x_bound or object['y_pix'] > y_bound:
object['valid'] = False
# image cropped size
width = 80
height = 70
# crop the valid object from the image
for object_idx, object in enumerate(objects):
if object['valid'] == True:
top_l_x = object['x_pix'] - int(width/2)
top_l_y = object['y_pix'] - int(height/2)
bot_r_x = object['x_pix'] + int(width/2)
bott_r_y = object['y_pix'] + int(height/2)
object['crop'] = image[ top_l_y:bott_r_y , top_l_x:bot_r_x ]
# pass the image to the classifier
im = Image.fromarray(object['crop'])
object['class_name'] = Classifier(im)
print(object['class_name'])
text_to_speach = 'the object detected is a'+ object['class_name']
TTS(text_to_speach)
# print and wait
cv2.imshow('win',object['crop'])
cv2.waitKey(0)
# draw on the image
img_2 = cv2.circle(image, (object['x_pix'],object['y_pix']), 3 ,[0,255,0], -1)
img_2 = cv2.rectangle(image, (top_l_x,top_l_y) ,(bot_r_x,bott_r_y), [0,255,0], 2)
img_2 = cv2.putText(image, object['class_name'], (top_l_x,top_l_y- 5),cv2.FONT_HERSHEY_SIMPLEX, 0.6, [0,255,0], 2)
# show the final image with the valid objects
cv2.imshow('win',image)
cv2.waitKey(0)
#--------------------------------------------------
# elaborate all the other images
#---------------------------------------------
# 0.813492 -0.0473971 -0.515618 -0.264773 0.959567 -0.513907 0.9545 # for image 00887
# entities_4 = []
# entities_4.append(point_cloud_original)
# rotation = point_cloud_original.get_rotation_matrix_from_quaternion(np.array([0.813492,-0.0473971, -0.515618, -0.264773]))
# traslation = np.array([0.959567, -0.513907, 0.9545])
# frame = o3d.geometry.TriangleMesh().create_coordinate_frame(size=2, origin = traslation)
# entities_4.append(frame)
# center_points=np.array([0,0,0])
# for object_idx, object in enumerate(objects):
# rotation = object['points'].get_rotation_matrix_from_quaternion(np.array([0.813492,-0.0473971, -0.515618, -0.264773]))
# traslation = np.array([0.959567, -0.513907, 0.9545])
# object['center_wrt_cam']= object['points'].rotate(rotation,center=[0,0,0])
# object['center_wrt_cam']= object['center_wrt_cam'].translate(traslation)
# object['center_wrt_cam']= object['center_wrt_cam'].get_center()
# center_points=np.vstack([center_points,object['center_wrt_cam']])
# frame = o3d.geometry.TriangleMesh().create_coordinate_frame(size=0.5, origin = np.array(object['center_wrt_cam']))
# entities_4.append(frame)
# o3d.visualization.draw_geometries(entities_4,
# zoom=view['trajectory'][0]['zoom'],
# front=view['trajectory'][0]['front'],
# lookat=view['trajectory'][0]['lookat'],
# up=view['trajectory'][0]['up'],
# point_show_normal=False)
# center_points = np.delete(center_points,0,0)
# [img_points,_] = cv2.projectPoints(center_points, rvec, tvec, intrinsic,distCoeffs=distCoeff)
# for i in range(np.size(img_points,0)) :
# x_pix = int(img_points[i][0][0])
# y_pix = int(img_points[i][0][1])
# img_2 = cv2.circle(image, (x_pix,y_pix), 3 ,[0,255,0], -1)
# top_l_x = x_pix - int(width/2)
# top_l_y = y_pix - int(height/2)
# bot_r_x = x_pix + int(width/2)
# bott_r_y = y_pix + int(height/2)
# img_2 = cv2.rectangle(image, (top_l_x,top_l_y) ,(bot_r_x,bott_r_y), [0,255,0], 2)
if __name__ == "__main__":
main()