-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbody_part_detection.py
77 lines (65 loc) · 2.25 KB
/
body_part_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""Used to find human poses in images.
"""
import os
import numpy as np
from enum import IntEnum
# ignore warnings and information messages
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import tensorflow as tf # noqa E402
# load the MoveNet model
model = tf.saved_model.load("./movenet_model")
movenet = model.signatures["serving_default"]
class BodyPart(IntEnum):
"""Access body parts returned by the detect_body_parts function.
Example usage:
body_parts = detect_body_parts(frame, 256)
print(body_parts[BodyPart.RIGHT_SHOULDER])
"""
NOSE = 0
LEFT_EYE = 1
RIGHT_EYE = 2
LEFT_EAR = 3
RIGHT_EAR = 4
LEFT_SHOULDER = 5
RIGHT_SHOULDER = 6
LEFT_ELBOW = 7
RIGHT_ELBOW = 8
LEFT_WRIST = 9
RIGHT_WRIST = 10
LEFT_HIP = 11
RIGHT_HIP = 12
LEFT_KNEE = 13
RIGHT_KNEE = 14
LEFT_ANKLE = 15
RIGHT_ANKLE = 16
def detect_body_parts(frame: np.ndarray, size: int = 256) -> np.ndarray:
"""Takes the image in form of a numpy array, detects the positions
and confidence of body parts and returns them in a numpy array.
:param frame np.ndarray: Frame of the image.
:param size int: Size of the square frame MoveNet detects body parts.
:return: Returns 17 points containing coordinates [x, y, confidence]
x - integer pixel coordinates on the original frame
y - integer pixel coordinates on the original frame
confidence - float from 0 to 1 indicating the probability of detection
:rtype: list[tuple[int, int, float]]
"""
# format the video frame to a square tensor required by MoveNet
image = tf.convert_to_tensor(frame)
image = tf.image.resize_with_pad(image, size, size)
image = tf.expand_dims(image, axis=0)
image = tf.cast(image, dtype=tf.int32)
# get the body parts positions and score
keypoints = movenet(image)["output_0"]
keypoints = keypoints.numpy().squeeze()
# return keypoints
# calculate the positions of body parts on the original frame
height, width, *_ = frame.shape
box_size = max(height, width)
return [
(
int(x * box_size - (box_size - width) / 2),
int(y * box_size - (box_size - height) / 2),
confidence,
)
for y, x, confidence in keypoints
]