diff --git a/angel-docker-build.sh b/angel-docker-build.sh
index 43018d243..8cc62d380 100755
--- a/angel-docker-build.sh
+++ b/angel-docker-build.sh
@@ -4,6 +4,7 @@
#
set -e
SCRIPT_DIR="$(cd "$(dirname "${0}")" && pwd)"
+export DOCKER_BUILDKIT=1
# source common functionalities
. "${SCRIPT_DIR}/scripts/common.bash"
diff --git a/angel_system/activity_classification/tcn_hpl/predict.py b/angel_system/activity_classification/tcn_hpl/predict.py
index 1830d9049..e25763277 100644
--- a/angel_system/activity_classification/tcn_hpl/predict.py
+++ b/angel_system/activity_classification/tcn_hpl/predict.py
@@ -14,7 +14,7 @@
import numpy.typing as npt
import torch
-from tcn_hpl.data.components.augmentations import NormalizePixelPts
+from tcn_hpl.data.components.augmentations import NormalizePixelPts, NormalizeFromCenter
from tcn_hpl.models.ptg_module import PTGLitModule
from angel_system.activity_classification.utils import (
@@ -23,12 +23,15 @@
)
-def load_module(checkpoint_file, label_mapping_file, torch_device) -> PTGLitModule:
+def load_module(
+ checkpoint_file, label_mapping_file, torch_device, topic
+) -> PTGLitModule:
"""
:param checkpoint_file:
:param label_mapping_file:
:param torch_device:
+ :param topic:
:return:
"""
# # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
@@ -45,6 +48,7 @@ def load_module(checkpoint_file, label_mapping_file, torch_device) -> PTGLitModu
# HParam overrides
data_dir=mapping_file_dir,
mapping_file_name=mapping_file_name,
+ topic=topic,
)
# print(f"CLASSES IN MODEL: {model.classes}")
@@ -91,9 +95,12 @@ class PatientPose:
def normalize_detection_features(
det_feats: npt.ArrayLike,
feat_version: int,
+ top_k_objects: int,
img_width: int,
img_height: int,
num_det_classes: int,
+ normalize_pixel_pts: bool,
+ normalize_center_pts: bool,
) -> None:
"""
Normalize input object detection descriptor vectors, outputting new vectors
@@ -108,9 +115,16 @@ def normalize_detection_features(
:return: Normalized object detection features.
"""
- # This method is known to normalize in-place.
- # Shape [window_size, n_feats]
- NormalizePixelPts(img_width, img_height, num_det_classes, feat_version)(det_feats)
+ if normalize_pixel_pts:
+ # This method is known to normalize in-place.
+ # Shape [window_size, n_feats]
+ NormalizePixelPts(
+ img_width, img_height, num_det_classes, feat_version, top_k_objects
+ )(det_feats)
+ if normalize_center_pts:
+ NormalizeFromCenter(
+ img_width, img_height, num_det_classes, feat_version, top_k_objects
+ )(det_feats)
def objects_to_feats(
@@ -121,7 +135,9 @@ def objects_to_feats(
image_width: int,
image_height: int,
feature_memo: Optional[Dict[int, npt.NDArray]] = None,
- top_n_objects: int = 3,
+ top_k_objects: int = 1,
+ normalize_pixel_pts=False,
+ normalize_center_pts=False,
) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Convert some object detections for some window of frames into a feature
@@ -160,128 +176,64 @@ def objects_to_feats(
feature_dtype = None
# hands-joints offset vectors
- zero_offset = [0 for i in range(22)]
- joint_left_hand_offset_all_frames = [None] * window_size
- joint_right_hand_offset_all_frames = [None] * window_size
- joint_object_offset_all_frames = [None] * window_size
+ zero_joint_offset = [0 for i in range(22)]
+
# for pose in frame_patient_poses:
- for i, (pose, detection) in enumerate(
+ for i, (pose, detections) in enumerate(
zip(frame_patient_poses, frame_object_detections)
):
- if detection is None:
+ pose_keypoints = []
+ print(pose)
+ if detections is None:
continue
- labels = detection.labels
- bx, by, bw, bh = tlbr_to_xywh(
- detection.top,
- detection.left,
- detection.bottom,
- detection.right,
- )
- # iterate over all detections in that frame
- joint_object_offset = []
- for j, label in enumerate(labels):
- if label == "hand (right)" or label == "hand (left)":
- x, y, w, h = bx[j], by[j], bw[j], bh[j]
-
- cx, cy = x + (w // 2), y + (h // 2)
- hand_point = np.array((cx, cy))
-
- offset_vector = []
- if pose is not None:
- for joint in pose:
- jx, jy = joint.positions.x, joint.positions.y
- joint_point = np.array((jx, jy))
- dist = np.linalg.norm(joint_point - hand_point)
- offset_vector.append(dist)
- else:
- offset_vector = zero_offset
-
- if label == "hand (left)":
- joint_left_hand_offset_all_frames[i] = offset_vector
- elif label == "hand (right)":
- joint_right_hand_offset_all_frames[i] = offset_vector
- else:
- # if objects_joints and num_objects > 0:
- x, y, w, h = bx[j], by[j], bw[j], bh[j]
- cx, cy = x + (w // 2), y + (h // 2)
- object_point = np.array((cx, cy))
- offset_vector = []
- if pose is not None:
- for joint in pose:
- jx, jy = joint.positions.x, joint.positions.y
- joint_point = np.array((jx, jy))
- dist = np.linalg.norm(joint_point - object_point)
- offset_vector.append(dist)
- else:
- offset_vector = zero_offset
- joint_object_offset.append(offset_vector)
-
- joint_object_offset_all_frames[i] = joint_object_offset
-
- for i, frame_dets in enumerate(frame_object_detections):
- frame_dets: ObjectDetectionsLTRB
- if frame_dets is not None:
- f_id = frame_dets.id
- if f_id not in feat_memo:
- # the input message has tlbr, but obj_det2d_set_to_feature
- # requires xywh.
- xs, ys, ws, hs = tlbr_to_xywh(
- frame_dets.top,
- frame_dets.left,
- frame_dets.bottom,
- frame_dets.right,
- )
- feat = obj_det2d_set_to_feature(
- frame_dets.labels,
+ detection_id = detections.id
+ confidences = detections.confidences
+ if detection_id in feat_memo.keys():
+ # We've already processed this set
+ feat = feat_memo[detection_id]
+ else:
+ labels = detections.labels
+ xs, ys, ws, hs = tlbr_to_xywh(
+ detections.top,
+ detections.left,
+ detections.bottom,
+ detections.right,
+ )
+
+ if pose is not None:
+ for joint in pose:
+ kwcoco_format_joint = {
+ "xy": [joint.positions.x, joint.positions.y],
+ "keypoint_category_id": -1, # TODO: not in message
+ "keypoint_category": joint.labels,
+ }
+ pose_keypoints.append(kwcoco_format_joint)
+
+ feat = (
+ obj_det2d_set_to_feature(
+ labels,
xs,
ys,
ws,
hs,
- frame_dets.confidences,
- None,
- None,
- None,
- None,
- None,
- label_to_ind=det_label_to_idx,
+ confidences,
+ pose_keypoints=(
+ pose_keypoints if pose_keypoints else zero_joint_offset
+ ),
+ obj_label_to_ind=det_label_to_idx,
version=feat_version,
- top_n_objects=top_n_objects,
+ top_k_objects=top_k_objects,
)
+ .ravel()
+ .astype(np.float32)
+ )
- offset_vector = []
-
- if joint_left_hand_offset_all_frames[i] is not None:
- offset_vector.extend(joint_left_hand_offset_all_frames[i])
- else:
- offset_vector.extend(zero_offset)
-
- if joint_right_hand_offset_all_frames[i] is not None:
- offset_vector.extend(joint_right_hand_offset_all_frames[i])
- else:
- offset_vector.extend(zero_offset)
-
- for j in range(top_n_objects):
- if joint_object_offset_all_frames[i] is not None:
- if len(joint_object_offset_all_frames[i]) > j:
- offset_vector.extend(joint_object_offset_all_frames[i][j])
- else:
- offset_vector.extend(zero_offset)
- else:
- offset_vector.extend(zero_offset)
-
- feat.extend(offset_vector)
- feat = np.array(feat, dtype=np.float64).ravel()
- feat_memo[f_id] = feat
-
- print(f"feat: {feat}")
- print(f"feat shape: {feat.shape}")
+ feat_memo[detection_id] = feat
- else:
- feat = feat_memo[f_id]
- feature_ndim = feat.shape
- feature_dtype = feat.dtype
- feature_list[i] = feat
+ feature_ndim = feat.shape
+ feature_dtype = feat.dtype
+ feature_list[i] = feat
# Already checked that we should have non-zero frames with detections above
# so feature_ndim/_dtype should not be None at this stage
assert feature_ndim is not None
@@ -306,9 +258,17 @@ def objects_to_feats(
# Normalize features
# Shape [window_size, n_feats]
- normalize_detection_features(
- feature_vec, feat_version, image_width, image_height, len(det_label_to_idx)
- )
+ if normalize_pixel_pts or normalize_center_pts:
+ normalize_detection_features(
+ feature_vec,
+ feat_version,
+ top_k_objects,
+ image_width,
+ image_height,
+ len(det_label_to_idx),
+ normalize_pixel_pts,
+ normalize_center_pts,
+ )
return feature_vec, mask
diff --git a/angel_system/activity_classification/train_activity_classifier.py b/angel_system/activity_classification/train_activity_classifier.py
index 288dd94c7..e60de931f 100644
--- a/angel_system/activity_classification/train_activity_classifier.py
+++ b/angel_system/activity_classification/train_activity_classifier.py
@@ -34,8 +34,8 @@ def data_loader(
- inv_act_map: Activity id to label string dict
- image_activity_gt: Image id to activity label string dict
- image_id_to_dataset: Image id to id in ``dset`` dict
- - label_to_ind: Object detection labels to ids dict
- - act_id_to_str: Object detection ids to labels dict
+ - obj_label_to_ind: Object detection labels to ids dict
+ - obj_ind_to_label: Object detection ids to labels dict
- ann_by_image: Image id to annotation dict
"""
print("Loading data....")
@@ -80,14 +80,11 @@ def data_loader(
min_cat = min([dset.cats[i]["id"] for i in dset.cats])
num_act = len(dset.cats)
- label_to_ind = {
+ obj_label_to_ind = {
dset.cats[i]["name"]: dset.cats[i]["id"] - min_cat for i in dset.cats
}
- print(
- f"Object label mapping:\n\t"
- f"{json.dumps([o['name'] for o in dset.categories().objs])}"
- )
- act_id_to_str = {dset.cats[i]["id"]: dset.cats[i]["name"] for i in dset.cats}
+ print(f"Object label mapping:\n\t", obj_label_to_ind)
+ obj_ind_to_label = {dset.cats[i]["id"]: dset.cats[i]["name"] for i in dset.cats}
ann_by_image = {}
for gid, anns in dset.index.gid_to_aids.items():
@@ -101,8 +98,8 @@ def data_loader(
inv_act_map,
image_activity_gt,
image_id_to_dataset,
- label_to_ind,
- act_id_to_str,
+ obj_label_to_ind,
+ obj_ind_to_label,
ann_by_image,
)
@@ -111,24 +108,23 @@ def compute_feats(
act_map: dict,
image_activity_gt: dict,
image_id_to_dataset: dict,
- label_to_ind: dict,
- act_id_to_str: dict,
+ obj_label_to_ind: dict,
+ obj_ind_to_label: dict,
ann_by_image: dict,
feat_version=1,
- objects_joints: bool = False,
- hands_joints: bool = False,
- aug_trans_range=None,
- aug_rot_range=None,
- top_n_objects=3,
+ top_k_objects=1,
) -> Tuple[np.ndarray, np.ndarray]:
"""Compute features from object detections
:param act_map: Activity label string to id
:param image_activity_gt: Image id to activity label string dict
:param image_id_to_dataset: Image id to id in ``dset`` dict
- :param label_to_ind: Object detection labels to ids dict
- :param act_id_to_str: Object detection ids to labels dict
+ :param obj_label_to_ind: Object detection labels to ids dict
+ :param obj_ind_to_label: Object detection ids to labels dict
:param ann_by_image: Image id to annotation dict
+ :param feat_version:
+ Version of the feature conversion approach.
+ :param top_k_objects: Number top confidence objects to use per label, defaults to 1
:return: resulting feature data and its labels
"""
@@ -137,18 +133,7 @@ def compute_feats(
Y = []
dataset_id = []
last_dset = 0
-
- hands_possible_labels = ["hand (right)", "hand (left)", "hand", "hands"]
- non_objects_labels = ["patient", "user"]
- hands_inds = [
- key for key, label in act_id_to_str.items() if label in hands_possible_labels
- ]
- non_object_inds = [
- key for key, label in act_id_to_str.items() if label in non_objects_labels
- ]
- object_inds = list(
- set(list(label_to_ind.values())) - set(hands_inds) - set(non_object_inds)
- )
+ zero_joint_offset = [0 for i in range(22)]
for image_id in sorted(list(ann_by_image.keys())):
label_vec = []
@@ -157,173 +142,39 @@ def compute_feats(
ws = []
hs = []
label_confidences = []
- obj_obj_contact_state = []
- obj_obj_contact_conf = []
- obj_hand_contact_state = []
- obj_hand_contact_conf = []
+ pose_keypoints = []
- if objects_joints or hands_joints:
- joint_left_hand_offset = []
- joint_right_hand_offset = []
- joint_object_offset = []
+ # Reorganize detections into lists
+ if len(ann_by_image[image_id]) == 0:
+ continue
+ pose_keypoints = zero_joint_offset
+ for ann in ann_by_image[image_id]:
+ cat = obj_ind_to_label[ann["category_id"]]
- num_hands, num_objects = 0, 0
+ # Ignore the patient and user bboxes, use the pose from the patient
+ if cat in ["patient", "user"]:
+ if cat == "patient":
+ pose_keypoints = ann["keypoints"]
+ continue
- for ann in ann_by_image[image_id]:
- if "keypoints" in ann.keys():
- pose_keypoints = ann["keypoints"]
-
- elif "confidence" in ann.keys():
- label_vec.append(act_id_to_str[ann["category_id"]])
- x, y = ann["bbox"][0], ann["bbox"][1]
- w, h = ann["bbox"][2], ann["bbox"][3]
-
- if aug_trans_range != None and aug_rot_range != None:
-
- print(f"performing augmentation")
- random_translation_x = np.random.uniform(
- aug_trans_range[0], aug_trans_range[1]
- )
- random_translation_y = np.random.uniform(
- aug_trans_range[0], aug_trans_range[1]
- )
- random_rotation = np.random.uniform(
- aug_rot_range[0], aug_rot_range[1]
- )
-
- object_center_x, object_center_y = x + w // 2, y + h // 2
-
- rotation_matrix = np.array(
- [
- [
- np.cos(random_rotation),
- -np.sin(random_rotation),
- random_translation_x,
- ],
- [
- np.sin(random_rotation),
- np.cos(random_rotation),
- random_translation_y,
- ],
- [0, 0, 1],
- ]
- )
-
- xy = np.array([x, y, 1])
- xy_center = np.array([object_center_x, object_center_y, 1])
-
- rot_xy = (xy - xy_center) @ rotation_matrix.T + xy_center
-
- x = rot_xy[0]
- y = rot_xy[1]
-
- xs.append(x)
- ys.append(y)
- ws.append(w)
- hs.append(h)
- label_confidences.append(ann["confidence"])
-
- if ann["category_id"] in hands_inds:
- num_hands += 1
- elif ann["category_id"] in object_inds:
- num_objects += 1
- try:
- obj_obj_contact_state.append(ann["obj-obj_contact_state"])
- obj_obj_contact_conf.append(ann["obj-obj_contact_conf"])
- obj_hand_contact_state.append(ann["obj-hand_contact_state"])
- obj_hand_contact_conf.append(ann["obj-hand_contact_conf"])
- except KeyError:
- pass
-
- # hardcoded width?
- image_center = 1280 // 2
- if num_hands > 0:
- hands_loc_dict = {}
- for i, label in enumerate(label_vec):
- if label == "hand":
- hand_center = xs[i] + ws[i] // 2
- if hand_center < image_center:
- if "hand (left)" not in hands_loc_dict.keys():
- label_vec[i] = "hand (left)"
- hands_loc_dict[label_vec[i]] = (hand_center, i)
- else:
- if hand_center > hands_loc_dict["hand (left)"][0]:
- label_vec[i] = "hand (right)"
- hands_loc_dict[label_vec[i]] = (hand_center, i)
- else:
- prev_index = hands_loc_dict["hand (left)"][1]
- label_vec[prev_index] = "hand (right)"
- label_vec[i] = "hand (left)"
- else:
- if "hand (right)" not in hands_loc_dict.keys():
- label_vec[i] = "hand (right)"
- hands_loc_dict[label_vec[i]] = (hand_center, i)
- else:
- if hand_center < hands_loc_dict["hand (right)"][0]:
- label_vec[i] = "hand (left)"
- hands_loc_dict[label_vec[i]] = (hand_center, i)
- else:
- prev_index = hands_loc_dict["hand (right)"][1]
- label_vec[prev_index] = "hand (left)"
- label_vec[i] = "hand (right)"
-
- if "hand" in label_to_ind.keys():
- label_to_ind_tmp = {}
- for key, value in label_to_ind.items():
- if key == "hand":
- label_to_ind_tmp["hand (left)"] = value
- label_to_ind_tmp["hand (right)"] = value + 1
- elif key in non_objects_labels:
- continue
- else:
- label_to_ind_tmp[key] = value + 1
-
- label_to_ind = label_to_ind_tmp
-
- zero_offset = [0 for i in range(22)]
- if (num_hands > 0 or num_objects > 0) and (hands_joints or objects_joints):
- joint_object_offset = []
- for i, label in enumerate(label_vec):
-
- if hands_joints and num_hands > 0:
-
- if label == "hand (right)" or label == "hand (left)":
- bx, by, bw, bh = xs[i], ys[i], ws[i], hs[i]
- hcx, hcy = bx + (bw // 2), by + (bh // 2)
- hand_point = np.array((hcx, hcy))
-
- offset_vector = []
- if "pose_keypoints" in locals():
- for joint in pose_keypoints:
- jx, jy = joint["xy"]
- joint_point = np.array((jx, jy))
- dist = np.linalg.norm(joint_point - hand_point)
- offset_vector.append(dist)
- else:
- offset_vector = zero_offset
-
- if label == "hand (left)":
- joint_left_hand_offset = offset_vector
- elif label == "hand (right)":
- joint_right_hand_offset = offset_vector
-
- else:
- if objects_joints and num_objects > 0:
- bx, by, bw, bh = xs[i], ys[i], ws[i], hs[i]
- ocx, ocy = bx + (bw // 2), by + (bh // 2)
- object_point = np.array((ocx, ocy))
- offset_vector = []
- if "pose_keypoints" in locals():
- for joint in pose_keypoints:
- jx, jy = joint["xy"]
- joint_point = np.array((jx, jy))
- dist = np.linalg.norm(joint_point - object_point)
- offset_vector.append(dist)
- else:
- offset_vector = zero_offset
-
- joint_object_offset.append(offset_vector)
+ label_vec.append(cat)
+
+ x, y, w, h = ann["bbox"]
+ xs.append(x)
+ ys.append(y)
+ ws.append(w)
+ hs.append(h)
+
+ label_confidences.append(ann["confidence"])
+ # Ignore the patient and user labels in the feature vector
+ only_obj_label_to_ind = {
+ k: i
+ for i, (k, v) in enumerate(obj_label_to_ind.items())
+ if k not in ["patient", "user"]
+ }
+
+ # Compute feature vector
feature_vec = obj_det2d_set_to_feature(
label_vec,
xs,
@@ -331,42 +182,12 @@ def compute_feats(
ws,
hs,
label_confidences,
- None,
- obj_obj_contact_state,
- obj_obj_contact_conf,
- obj_hand_contact_state,
- obj_hand_contact_conf,
- label_to_ind,
+ pose_keypoints,
+ only_obj_label_to_ind,
version=feat_version,
- top_n_objects=top_n_objects,
+ top_k_objects=top_k_objects,
)
- if objects_joints or hands_joints:
- zero_offset = [0 for i in range(22)]
- offset_vector = []
- if hands_joints:
-
- if len(joint_left_hand_offset) >= 1:
- offset_vector.extend(joint_left_hand_offset)
- else:
- offset_vector.extend(zero_offset)
-
- if len(joint_right_hand_offset) >= 1:
- offset_vector.extend(joint_right_hand_offset)
- else:
- offset_vector.extend(zero_offset)
- if objects_joints:
-
- for i in range(top_n_objects):
- if len(joint_object_offset) > i:
- offset_vector.extend(joint_object_offset[i])
- else:
- offset_vector.extend(zero_offset)
-
- feature_vec.extend(offset_vector)
-
- feature_vec = np.array(feature_vec, dtype=np.float64)
-
X.append(feature_vec.ravel())
try:
@@ -477,19 +298,19 @@ def validate(
def save(
output_dir: Union[str, PosixPath],
act_str_list: List[str],
- label_to_ind: dict,
+ obj_label_to_ind: dict,
clf: RandomForestClassifier,
):
"""Save the model to a pickle file
:param output_dir: Path to save the model to
:param act_str_list: List of activity label strings
- :param label_to_ind: Object detection labels to ids dict
+ :param obj_label_to_ind: Object detection labels to ids dict
:param clf: model
"""
output_fn = f"{output_dir}/activity_weights.pkl"
with open(output_fn, "wb") as of:
- pickle.dump([label_to_ind, 1, clf, act_str_list], of)
+ pickle.dump([obj_label_to_ind, 1, clf, act_str_list], of)
print(f"Saved weights to {output_fn}")
@@ -509,16 +330,16 @@ def train_activity_classifier(args: argparse.Namespace):
inv_act_map,
image_activity_gt,
image_id_to_dataset,
- label_to_ind,
- act_id_to_str,
+ obj_label_to_ind,
+ obj_ind_to_label,
ann_by_image,
) = data_loader(args.train_fn, act_labels)
X, y = compute_feats(
act_map,
image_activity_gt,
image_id_to_dataset,
- label_to_ind,
- act_id_to_str,
+ obj_label_to_ind,
+ obj_ind_to_label,
ann_by_image,
)
plot_dataset_counts(X, y, args.output_dir, "train")
@@ -529,16 +350,16 @@ def train_activity_classifier(args: argparse.Namespace):
val_inv_act_map,
val_image_activity_gt,
val_image_id_to_dataset,
- val_label_to_ind,
- val_act_id_to_str,
+ val_obj_label_to_ind,
+ val_obj_ind_to_label,
val_ann_by_image,
) = data_loader(args.val_fn, act_labels)
X_final_test, y_final_test = compute_feats(
val_act_map,
val_image_activity_gt,
val_image_id_to_dataset,
- val_label_to_ind,
- val_act_id_to_str,
+ val_obj_label_to_ind,
+ val_obj_ind_to_label,
val_ann_by_image,
)
plot_dataset_counts(X_final_test, y_final_test, args.output_dir, "val")
@@ -549,7 +370,7 @@ def train_activity_classifier(args: argparse.Namespace):
# Save
act_str_list = [inv_act_map[key] for key in sorted(list(set(y)))]
- save(args.output_dir, act_str_list, label_to_ind, clf)
+ save(args.output_dir, act_str_list, obj_label_to_ind, clf)
def main():
diff --git a/angel_system/activity_classification/utils.py b/angel_system/activity_classification/utils.py
index d2ba9419d..eb845ba48 100644
--- a/angel_system/activity_classification/utils.py
+++ b/angel_system/activity_classification/utils.py
@@ -1,13 +1,31 @@
-from typing import Dict
-from typing import Tuple
+import os
+
+from typing import Dict, Tuple, List
import kwimage
+import random
import numpy as np
import numpy.typing as npt
import matplotlib.pyplot as plt
-import matplotlib.patches as patches
+import matplotlib.colors as mcolors
+
from PIL import Image
+from pathlib import Path
+
+
+#########################
+# Default values
+#########################
+default_dist = (0, 0) # (1280 * 2, 720 * 2)
+default_center_dist = (0, 0) # (1280, 720)
+default_bbox = [0, 0, 0, 0] # [0, 0, 1280, 720]
+default_center = ([[0]], [[0]]) # kwimage.Boxes([default_bbox], "xywh").center
+default_center_list = [default_center[0][0][0], default_center[1][0][0]]
+zero_joint_offset = [0 for i in range(22)]
+
+random_colors = list(mcolors.CSS4_COLORS.keys())
+random.shuffle(random_colors)
def tlbr_to_xywh(
@@ -26,6 +44,7 @@ def tlbr_to_xywh(
:param right: Array-like of right box coordinate values.
:return:
+ List of x values, List of y values, List of width values, List of height values
"""
assert (
len(top) == len(left) == len(bottom) == len(right)
@@ -37,246 +56,739 @@ def tlbr_to_xywh(
return xs, ys, ws, hs
-def obj_det2d_set_to_feature(
- label_vec,
- xs,
- ys,
- ws,
- hs,
- label_confidences,
- descriptors,
- obj_obj_contact_state,
- obj_obj_contact_conf,
- obj_hand_contact_state,
- obj_hand_contact_conf,
- label_to_ind: Dict[str, int],
- version: int = 1,
- top_n_objects=3,
-):
- """Convert ObjectDetection2dSet fields into a feature vector.
+def feature_version_to_options(feature_version: int) -> Dict[str, bool]:
+ """Convert the feature version number to a dict of
+ boolean flags indicating which data values should be added to the feature vector
- :param label_to_ind:
- Dictionary mapping a label str and returns the index within the feature vector.
+ :param feature_version: Version of the feature conversion approach.
- :param version:
- Version of the feature conversion approach.
+ :return:
+ Dictionary of flag names and boolean values that match the input parameters
+ to the functions that create/utilize the feature vector
"""
- if version == 1:
- """
- Feature vector that encodes the activation feature of each class
-
- Len: 42
-
- [A[obj1] ... A[objN]]
- """
- feature_vec = obj_det2d_set_to_feature_by_method(
- label_vec,
- xs,
- ys,
- ws,
- hs,
- label_confidences,
- label_to_ind,
- use_activation=True,
- )
+ options = {}
- elif version == 2:
- """
- Feature vector that encodes the distance of each object from each hand,
- and the activation features
+ """
+ Feature vector that encodes the activation feature of each class
- Len: 204
+ Len: top_k_objects * num_obj_classes
- [
+ [
+ for k_obj in top_k_object:
+ A[obj1] ... A[objN]
+ ]
+ """
+ options[1] = {"use_activation": True}
+
+ """
+ Feature vector that encodes the distance of each object from each hand,
+ and the activation features
+
+ Len:
+ top_k_objects * (
+ 1 + (num_obj_classes-2)*2 + 1 + (num_obj_classes-2)*2 + 2 + (num_obj_classes-2)
+ )
+
+ [
+ for k_obj in top_k_object:
A[right hand],
- D[right hand, obj1]x, D[right hand, obj1]y, ... , D[right hand, objN]y,
+ D[right hand, obj1_k]x, D[right hand, obj1_k]y, ... , D[right hand, objN_k]y,
A[left hand],
- D[left hand, obj1]x, D[left hand, obj1]y, ... , D[left hand, objN]y,
+ D[left hand, obj1_k]x, D[left hand, obj1_k]y, ... , D[left hand, objN_k]y,
D[right hand, left hand]x, D[right hand, left hand]y,
- A[obj1] ... A[objN]
- ]
- """
- feature_vec = obj_det2d_set_to_feature_by_method(
- label_vec,
- xs,
- ys,
- ws,
- hs,
- label_confidences,
- label_to_ind,
- use_activation=True,
- use_hand_dist=True,
- )
+ A[obj1_k] ... A[objN_k]
+ ]
+ """
+ options[2] = {
+ "use_activation": True,
+ "use_hand_dist": True,
+ }
- elif version == 3:
- """
- Feature vector that encodes the distance of each object to the center of the frame,
- the intersection of each object to the hands,
- and the activation features
+ """
+ Feature vector that encodes the distance of each object to the center of the frame,
+ the intersection of each object to the hands,
+ and the activation features
- Len: 207
+ Len:
+ top_k_objects * (
+ 1 + 2 + 1 + 2 + 1 + (1 + 1 + 1 + 2) * (num_obj_classes-2)
+ )
- [
+ [
+ for k_obj in top_k_object:
A[right hand],
D[right hand, center]x, D[right hand, center]y,
A[left hand],
D[left hand, center]x, D[left hand, center]y,
- I[right hand, left hand]
- A[obj1],
- I[right hand, obj1],
- I[left hand, obj1]
- D[obj1, center]x, D[obj1, center]y
- ]
- """
- feature_vec = obj_det2d_set_to_feature_by_method(
- label_vec,
- xs,
- ys,
- ws,
- hs,
- label_confidences,
- label_to_ind,
- use_activation=True,
- use_center_dist=True,
- use_intersection=True,
- )
+ I[right hand, left hand],
+ A[obj1_k] I[right hand, obj1_k] I[left hand, obj1_k], D[obj1_k, center]x, D[obj1_k, center]y ... , D[objN_k, center]y
+ ]
+ """
+ options[3] = {
+ "use_activation": True,
+ "use_center_dist": True,
+ "use_intersection": True,
+ }
- elif version == 5:
- """
- Feature vector that encodes the distance of each object from each hand,
- the intersection of each object to the hands,
- and the activation features
+ """
+ Feature vector that encodes the distance of each object from each hand,
+ the intersection of each object to the hands,
+ and the activation features
- Len: 1 + ((N-2)*2) + 1 + ((N-2)*2) + 2 + 1 + (3 * (N-2)), where N is the number of object classes
+ Len:
+ top_k_objects * (
+ 1 + 2 * (num_obj_classes-2) + 1 + 2 * (num_obj_classes-2) + 2 + 1 + (1 + 1 + 1) * (num_obj_classes-2)
+ )
+
+ [
+ for k_obj in top_k_object:
+ A[right hand],
+ D[right hand, obj1_k]x, D[right hand, obj1_k]y, ... , D[right hand, objN_k]y,
+ A[left hand],
+ D[left hand, obj1_k]x, D[left hand, obj1_k]y, ... , D[left hand, objN_k]y,
+ D[right hand, left hand]x, D[right hand, left hand]y,
+ I[right hand, left hand],
+ A[obj1_k] I[right hand, obj1_k] I[left hand, obj1_k], ... , I[left hand, objN_k]
+ ]
+ """
+ options[5] = {
+ "use_activation": True,
+ "use_hand_dist": True,
+ "use_intersection": True,
+ }
- [
+ """
+ Feature vector that encodes the distance of each object from each hand,
+ the intersection of each object to the hands,
+ the distance from the center of the hands to each patient joint,
+ and the distance from the center of each object to each patient joint,
+ and the activation features
+
+ Len:
+ top_k_objects * (
+ (1 + (num_obj_classes-2)*2) * 2 + 2 + 1
+ + (num_obj_classes-2) * (1+1+1)
+ )
+ + 22*2 + 22*2
+ + top_k_objects * ((22*2)*(num_obj_classes-2))
+
+
+ [
+ for k_obj in top_k_object:
A[right hand],
- D[right hand, obj1]x, D[right hand, obj1]y, ... , D[right hand, objN]y,
+ D[right hand, obj1_k]x, D[right hand, obj1_k]y, ... , D[right hand, objN_k]y,
A[left hand],
- D[left hand, obj1]x, D[left hand, obj1]y, ... , D[left hand, objN]y,
+ D[left hand, obj1_k]x, D[left hand, obj1_k]y, ... , D[left hand, objN_k]y,
D[right hand, left hand]x, D[right hand, left hand]y,
- I[right hand, left hand]
- A[obj1] I[right hand, obj1] I[left hand, obj1], ... , I[left hand, objN]
- ]
- """
- feature_vec = obj_det2d_set_to_feature_by_method(
- label_vec,
- xs,
- ys,
- ws,
- hs,
- label_confidences,
- label_to_ind,
- use_activation=True,
- use_hand_dist=True,
- use_intersection=True,
- )
- elif version == 6:
- feature_vec = obj_det2d_set_to_feature_by_method(
- label_vec,
- xs,
- ys,
- ws,
- hs,
- label_confidences,
- label_to_ind,
- use_activation=True,
- use_hand_dist=True,
- use_intersection=True,
- use_joint_hand_offset=True,
- use_joint_object_offset=True,
- top_n_objects=top_n_objects,
- )
- else:
- raise NotImplementedError(f"Unhandled version '{version}'")
+ I[right hand, left hand],
+ A[obj1_k] I[right hand, obj1_k] I[left hand, obj1_k], ... , I[left hand, objN_k],
+ D[left hand, joint1]x, ... , D[left hand, joint 22]y,
+ D[right hand, joint1]x, ... , D[right hand, joint 22]y,
+ for k_obj in top_k_object:
+ D[obj1_k, joint1]x, ... , D[obj1_k, joint22]y,
+ ...,
+ D[objN_k, joint1]x, ... , D[objN_k, joint22]y
+ ]
+ """
+ options[6] = {
+ "use_activation": True,
+ "use_hand_dist": True,
+ "use_intersection": True,
+ "use_joint_hand_offset": True,
+ "use_joint_object_offset": True,
+ }
+
+ return options[feature_version]
+
+
+def obj_det2d_set_to_feature(
+ label_vec: List[str],
+ xs: List[float],
+ ys: List[float],
+ ws: List[float],
+ hs: List[float],
+ label_confidences: List[float],
+ pose_keypoints: List[Dict],
+ obj_label_to_ind: Dict[str, int],
+ version: int = 1,
+ top_k_objects: int = 1,
+):
+ """Convert ObjectDetection2dSet fields into a feature vector.
+
+ :param label_vec: List of object labels for each detection (length: # detections)
+ :param xs: List of x values for each detection (length: # detections)
+ :param ys: List of y values for each detection (length: # detections)
+ :param ws: List of width values for each detection (length: # detections)
+ :param hs: List of height values for each detection (length: # detections)
+ :param label_confidences: List of confidence values for each detection (length: # detections)
+ :param pose_keypoints:
+ List of joints, represented by a dictionary contining the x and y corrdinates of the points and the category id and string
+ :param obj_label_to_ind:
+ Dictionary mapping a label str and returns the index within the feature vector.
+ :param version:
+ Version of the feature conversion approach.
+ :param top_k_objects: Number top confidence objects to use per label, defaults to 1
+
+ :return: resulting feature data
+ """
+ opts = feature_version_to_options(version)
+ feature_vec = obj_det2d_set_to_feature_by_method(
+ label_vec,
+ xs,
+ ys,
+ ws,
+ hs,
+ label_confidences,
+ pose_keypoints,
+ obj_label_to_ind,
+ top_k_objects=top_k_objects,
+ **opts,
+ )
# print(f"feat {feature_vec}")
# print(len(feature_vec))
return feature_vec
-def obj_det2d_set_to_feature_by_method(
- label_vec,
- xs,
- ys,
- ws,
- hs,
- label_confidences,
- label_to_ind: Dict[str, int],
- use_activation=False,
- use_hand_dist=False,
- use_center_dist=False,
- use_intersection=False,
- use_joint_hand_offset=False,
- use_joint_object_offset=False,
- top_n_objects=3,
+def plot_feature_vec(
+ image_fn: str,
+ right_hand_center: list,
+ left_hand_center: list,
+ feature_vec: np.array,
+ obj_label_to_ind: Dict[str, int],
+ output_dir: str,
+ top_k_objects: int = 1,
+ use_activation: bool = False,
+ use_hand_dist: bool = False,
+ use_center_dist: bool = False,
+ use_intersection: bool = False,
+ use_joint_hand_offset: bool = False,
+ use_joint_object_offset: bool = False,
+ joint_names: List[str] = [
+ "nose",
+ "mouth",
+ "throat",
+ "chest",
+ "stomach",
+ "left_upper_arm",
+ "right_upper_arm",
+ "left_lower_arm",
+ "right_lower_arm",
+ "left_wrist",
+ "right_wrist",
+ "left_hand",
+ "right_hand",
+ "left_upper_leg",
+ "right_upper_leg",
+ "left_knee",
+ "right_knee",
+ "left_lower_leg",
+ "right_lower_leg",
+ "left_foot",
+ "right_foot",
+ "back",
+ ],
+ colors: List[str] = [
+ "yellow",
+ "red",
+ "green",
+ "lightblue",
+ "blue",
+ "purple",
+ "orange",
+ ],
):
+ """Plot the object and joint points based on the hand bbox centers and the distance values
+ in the feature vector
+
+ :param image_fn: Path to the image to draw on
+ :param right_hand_center: List of the x and y coordinates of the right hand box center
+ :param left_hand_center: List of the x and y coordinates of the left hand box center
+ :param feature_vec: Numpy array of values determined by the provided flags
+ :param obj_label_to_ind:
+ Dictionary mapping a label str and returns the index within the feature vector.
+ :param output_dir: Path to a folder to save the generated images to
+ :param top_k_objects: Number top confidence objects to use per label, defaults to 1
+ :param use_activation: If True, add the confidence values of the detections to the feature vector, defaults to False
+ :param use_hand_dist: If True, add the distance of the detection centers to both hand centers to the feature vector, defaults to False
+ :param use_intersection: If True, add the intersection of the detection boxes with the hand boxes to the feature vector, defaults to False
+ :param use_joint_hand_offset: If True, add the distance of the hand centers to the patient joints to the feature vector, defaults to False
+ :param use_joint_object_offset: If True, add the distance of the object centers to the patient joints to the feature vector, defaults to False
+ :param joint_names: List of the joint names
+ :param colors: List of colors to use when plotting points
"""
- `label_vec`, `xs`, `ys`, `ws`, hs` are to all be parallel in association
- and describe the object detections to create an embedding from.
-
- :param label_vec: Object label of the most confident class for each
- detection.
- :param xs: Upper-left X coordinate for each detection.
- :param ys: Upper-left Y coordinate for each detection.
- :param ws: Pixel width for each detection.
- :param hs: Pixel height for each detection.
- :param label_confidences: Confidence value of the most confident class for
- each detection.
- :param label_to_ind: Mapping of detection class indices
- :param use_activation:
- :param use_hand_dist:
- :param use_center_dist:
- :param use_intersection:
-
- :return: Feature vector embedding of the input detections.
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
+
+ rh_joint_dists = []
+ lh_joint_dists = []
+ rh_dists_k = [[] for i in range(top_k_objects)]
+ lh_dists_k = [[] for i in range(top_k_objects)]
+ obj_confs_k = [[] for i in range(top_k_objects)]
+ obj_im_center_dists_k = [[] for i in range(top_k_objects)]
+ obj_joint_dists_k = [[] for i in range(top_k_objects)]
+
+ non_object_labels = ["hand (left)", "hand (right)", "user", "patient"]
+ labels = sorted(obj_label_to_ind)
+ for non_obj_label in non_object_labels:
+ labels.remove(non_obj_label)
+
+ ind = -1
+ for object_k_index in range(top_k_objects):
+ # RIGHT HAND
+ if use_activation:
+ ind += 1
+ right_hand_conf = feature_vec[ind]
+
+ if use_hand_dist:
+ for obj_label in labels:
+ ind += 1
+ obj_rh_dist_x = feature_vec[ind]
+ ind += 1
+ obj_rh_dist_y = feature_vec[ind]
+
+ rh_dists_k[object_k_index].append([obj_rh_dist_x, obj_rh_dist_y])
+
+ if use_center_dist:
+ ind += 1
+ rh_im_center_dist_x = feature_vec[ind]
+ ind += 1
+ rh_im_center_dist_y = feature_vec[ind]
+
+ # LEFT HAND
+ if use_activation:
+ ind += 1
+ left_hand_conf = feature_vec[ind]
+
+ if use_hand_dist:
+ # Left hand distances
+ for obj_label in labels:
+ ind += 1
+ obj_lh_dist_x = feature_vec[ind]
+ ind += 1
+ obj_lh_dist_y = feature_vec[ind]
+
+ lh_dists_k[object_k_index].append([obj_lh_dist_x, obj_lh_dist_y])
+
+ if use_center_dist:
+ ind += 1
+ lh_im_center_dist_x = feature_vec[ind]
+ ind += 1
+ lh_im_center_dist_y = feature_vec[ind]
+
+ # Right - left hand
+ if use_hand_dist:
+ # Right - left hand distance
+ ind += 1
+ rh_lh_dist_x = feature_vec[ind]
+ ind += 1
+ rh_lh_dist_y = feature_vec[ind]
+ if use_intersection:
+ ind += 1
+ lh_rh_intersect = feature_vec[ind]
+
+ # OBJECTS
+ for obj_label in labels:
+ if use_activation:
+ # Object confidence
+ ind += 1
+ obj_conf = feature_vec[ind]
+
+ obj_confs_k[object_k_index].append(obj_conf)
+
+ if use_intersection:
+ # obj - right hand intersection
+ ind += 1
+ obj_rh_intersect = feature_vec[ind]
+ # obj - left hand intersection
+ ind += 1
+ obj_lh_intersect = feature_vec[ind]
+
+ if use_center_dist:
+ # image center - obj distances
+ ind += 1
+ obj_im_center_dist_x = feature_vec[ind]
+ ind += 1
+ obj_im_center_dist_y = feature_vec[ind]
+
+ obj_im_center_dists_k[object_k_index].append(
+ [obj_im_center_dist_x, obj_im_center_dist_y]
+ )
+
+ # HANDS-JOINTS
+ if use_joint_hand_offset:
+ # left hand - joints distances
+ for i in range(22):
+ ind += 1
+ lh_jointi_dist_x = feature_vec[ind]
+ ind += 1
+ lh_jointi_dist_y = feature_vec[ind]
+
+ lh_joint_dists.append([lh_jointi_dist_x, lh_jointi_dist_y])
+
+ # right hand - joints distances
+ for i in range(22):
+ ind += 1
+ rh_jointi_dist_x = feature_vec[ind]
+ ind += 1
+ rh_jointi_dist_y = feature_vec[ind]
+
+ rh_joint_dists.append([rh_jointi_dist_x, rh_jointi_dist_y])
+
+ # OBJS-JOINTS
+ if use_joint_object_offset:
+ for object_k_index in range(top_k_objects):
+ # obj - joints distances
+ for obj_label in labels:
+ joints_dists = []
+ for i in range(22):
+ ind += 1
+ obj_jointi_dist_x = feature_vec[ind]
+ ind += 1
+ obj_jointi_dist_y = feature_vec[ind]
+
+ joints_dists.append([obj_jointi_dist_x, obj_jointi_dist_y])
+
+ obj_joint_dists_k[object_k_index].append(joints_dists)
+
+ # Draw
+ fig, (
+ (lh_dist_ax, rh_dist_ax),
+ (im_center_dist_ax, obj_joint_dist_ax),
+ (lh_joint_dist_ax, rh_joint_dist_ax),
+ ) = plt.subplots(3, 2, figsize=(15, 15))
+ axes = [
+ rh_dist_ax,
+ lh_dist_ax,
+ im_center_dist_ax,
+ obj_joint_dist_ax,
+ rh_joint_dist_ax,
+ lh_joint_dist_ax,
+ ]
+ flags = [
+ use_hand_dist,
+ use_hand_dist,
+ use_center_dist,
+ use_joint_object_offset,
+ use_joint_hand_offset,
+ use_joint_hand_offset,
+ ]
+
+ rh_dist_ax.set_title("Objects from distance to right hand")
+ lh_dist_ax.set_title("Objects from distance to left hand")
+ im_center_dist_ax.set_title("Objects from distance to image center")
+ obj_joint_dist_ax.set_title("Joints from distance to objects*")
+ rh_joint_dist_ax.set_title("Joints from distance to right hand")
+ lh_joint_dist_ax.set_title("Joints from distance to left hand")
+
+ rh_dist_color = colors[2]
+ lh_dist_color = colors[3]
+ obj_im_center_dist_color = colors[4]
+ lh_joint_color = colors[5]
+ rh_joint_color = colors[6]
+
+ image = Image.open(image_fn)
+ image = np.array(image)
+
+ # Default values for each plot
+ for ax, flag in zip(axes, flags):
+ if not flag:
+ continue
+
+ ax.imshow(image)
+
+ ax.plot(right_hand_center[0], right_hand_center[1], color=colors[0], marker="o")
+ ax.annotate(
+ f"hand (right): {round(right_hand_conf, 2)}",
+ right_hand_center,
+ color="black",
+ annotation_clip=False,
+ )
+
+ ax.plot(left_hand_center[0], left_hand_center[1], color=colors[1], marker="o")
+ ax.annotate(
+ f"hand (left): {round(left_hand_conf, 2)}",
+ left_hand_center,
+ color="black",
+ annotation_clip=False,
+ )
+
+ def draw_points_by_distance(ax, distances, pt, color, labels, confs):
+ # Make sure the reference point exists
+ if pt == default_center_list:
+ return
+
+ for i, dist in enumerate(distances):
+ # Make sure the object point exists
+ if dist == list(default_dist):
+ continue
+
+ obj_pt = [pt[0] - dist[0], pt[1] - dist[1]] # pt - obj_pt = dist
+
+ ax.plot([pt[0], obj_pt[0]], [pt[1], obj_pt[1]], color=color, marker="o")
+ ax.annotate(
+ f"{labels[i]}: {round(confs[i], 2)}",
+ obj_pt,
+ color="black",
+ annotation_clip=False,
+ )
+
+ if use_joint_hand_offset:
+ draw_points_by_distance(
+ rh_joint_dist_ax,
+ rh_joint_dists,
+ right_hand_center,
+ rh_joint_color,
+ joint_names,
+ [1] * len(joint_names),
+ )
+ draw_points_by_distance(
+ lh_joint_dist_ax,
+ lh_joint_dists,
+ left_hand_center,
+ lh_joint_color,
+ joint_names,
+ [1] * len(joint_names),
+ )
+
+ if use_hand_dist:
+ rh_dist_ax.plot(
+ [right_hand_center[0], right_hand_center[0] - rh_lh_dist_x],
+ [right_hand_center[1], right_hand_center[1] - rh_lh_dist_y],
+ color=random_colors[0],
+ marker="o",
+ )
+
+ for object_k_index in range(top_k_objects):
+ if use_hand_dist:
+ draw_points_by_distance(
+ rh_dist_ax,
+ rh_dists_k[object_k_index],
+ right_hand_center,
+ rh_dist_color,
+ labels,
+ obj_confs_k[object_k_index],
+ )
+ draw_points_by_distance(
+ lh_dist_ax,
+ lh_dists_k[object_k_index],
+ left_hand_center,
+ lh_dist_color,
+ labels,
+ obj_confs_k[object_k_index],
+ )
+
+ if use_center_dist:
+ image_center = [1280 // 2, 720 // 2]
+ im_center_dist_ax.plot(image_center, color=colors[1], marker="o")
+ im_center_dist_ax.annotate(
+ "image_center", image_center, color="black", annotation_clip=False
+ )
+ draw_points_by_distance(
+ im_center_dist_ax,
+ obj_im_center_dists_k[object_k_index],
+ image_center,
+ obj_im_center_dist_color,
+ labels,
+ obj_confs_k[object_k_index],
+ )
+
+ if use_joint_object_offset:
+
+ obj_pts = []
+ if use_hand_dist:
+ if right_hand_center != default_center_list:
+ obj_pts = [
+ (
+ [
+ right_hand_center[0] - rh_dist[0],
+ right_hand_center[1] - rh_dist[1],
+ ]
+ if rh_dist != list(default_dist)
+ else default_center_list
+ )
+ for rh_dist in rh_dists_k[object_k_index]
+ ]
+ elif left_hand_center != default_center_list:
+ obj_pts = [
+ (
+ [
+ left_hand_center[0] - lh_dist[0],
+ left_hand_center[1] - lh_dist[1],
+ ]
+ if lh_dist != list(default_dist)
+ else default_center_list
+ )
+ for lh_dist in lh_dists_k[object_k_index]
+ ]
+ elif use_center_dist:
+ obj_pts = [
+ (
+ [
+ image_center[0] - im_center_dist[0],
+ image_center[1] - im_center_dist[1],
+ ]
+ if im_center_dist != list(default_dist)
+ else default_center_list
+ )
+ for im_center_dist in obj_im_center_dists_k[object_k_index]
+ ]
+
+ if not obj_pts:
+ continue
+
+ for i, obj_pt in enumerate(obj_pts):
+ if obj_pt == default_center_list:
+ continue
+
+ obj_joint_color = random_colors[(object_k_index * len(obj_pt)) + i]
+ obj_joint_dist_ax.plot(
+ obj_pt[0], obj_pt[1], color=obj_joint_color, marker="o"
+ )
+ obj_joint_dist_ax.annotate(
+ f"{labels[i]}: {round(obj_confs_k[object_k_index][i], 2)}",
+ obj_pt,
+ color="black",
+ annotation_clip=False,
+ )
+ draw_points_by_distance(
+ obj_joint_dist_ax,
+ obj_joint_dists_k[object_k_index][i],
+ obj_pt,
+ obj_joint_color,
+ joint_names,
+ [1] * len(joint_names),
+ )
+
+ Path(f"{output_dir}/full_feature_vec").mkdir(parents=True, exist_ok=True)
+ plt.savefig(f"{output_dir}/full_feature_vec/{os.path.basename(image_fn)}")
+
+ def copy_ax_to_new_fig(ax, subfolder):
+ ax.remove()
+
+ fig2 = plt.figure(figsize=(15, 15))
+ ax.figure = fig2
+ fig2.axes.append(ax)
+ fig2.add_axes(ax)
+
+ dummy = fig2.add_subplot(111)
+ ax.set_position(dummy.get_position())
+ dummy.remove()
+
+ Path(f"{output_dir}/{subfolder}").mkdir(parents=True, exist_ok=True)
+ plt.savefig(f"{output_dir}/{subfolder}/{os.path.basename(image_fn)}")
+
+ plt.close(fig2)
+
+ # Save each subplot as its own image
+ for ax, subfolder, flag in zip(
+ [
+ lh_dist_ax,
+ rh_dist_ax,
+ im_center_dist_ax,
+ obj_joint_dist_ax,
+ lh_joint_dist_ax,
+ rh_joint_dist_ax,
+ ],
+ [
+ "left_hand_obj_dist",
+ "right_hand_obj_dist",
+ "image_center_obj_dist",
+ "obj_joints_dist",
+ "left_hand_joints_dist",
+ "right_hand_joints_dist",
+ ],
+ flags,
+ ):
+ if not flag:
+ continue
+ copy_ax_to_new_fig(ax, subfolder)
+
+ plt.close(fig)
+
+
+def obj_det2d_set_to_feature_by_method(
+ label_vec: List[str],
+ xs: List[float],
+ ys: List[float],
+ ws: List[float],
+ hs: List[float],
+ label_confidences: List[float],
+ pose_keypoints: List[Dict],
+ obj_label_to_ind: Dict[str, int],
+ top_k_objects: int = 1,
+ use_activation: bool = False,
+ use_hand_dist: bool = False,
+ use_center_dist: bool = False,
+ use_intersection: bool = False,
+ use_joint_hand_offset: bool = False,
+ use_joint_object_offset: bool = False,
+):
"""
- #########################
- # Default values
- #########################
- default_dist = (0, 0) # (1280 * 2, 720 * 2)
- default_center_dist = (0, 0) # (1280, 720)
- default_bbox = [0, 0, 0, 0] # [0, 0, 1280, 720]
- default_center = ([[0]], [[0]]) # kwimage.Boxes([default_bbox], "xywh").center
- width, height = 1280, 720
- image_center = width // 2
+ :param label_vec: List of object labels for each detection (length: # detections)
+ :param xs: List of x values for each detection (length: # detections)
+ :param ys: List of y values for each detection (length: # detections)
+ :param ws: List of width values for each detection (length: # detections)
+ :param hs: List of height values for each detection (length: # detections)
+ :param label_confidences: List of confidence values for each detection (length: # detections)
+ :param pose_keypoints:
+ List of joints, represented by a dictionary contining the x and y corrdinates of the points and the category id and string
+ :param obj_label_to_ind:
+ Dictionary mapping a label str and returns the index within the feature vector.
+ :param top_k_objects: Number top confidence objects to use per label, defaults to 1
+ :param use_activation: If True, add the confidence values of the detections to the feature vector, defaults to False
+ :param use_hand_dist: If True, add the distance of the detection centers to both hand centers to the feature vector, defaults to False
+ :param use_intersection: If True, add the intersection of the detection boxes with the hand boxes to the feature vector, defaults to False
+ :param use_joint_hand_offset: If True, add the distance of the hand centers to the patient joints to the feature vector, defaults to False
+ :param use_joint_object_offset: If True, add the distance of the object centers to the patient joints to the feature vector, defaults to False
+ :return:
+ resulting feature data
+ """
#########################
# Data
#########################
# Number of object detection classes
- hand_labels = ["hand (right)", "hand (left)", "hand", "hands"]
- non_objects_labels = ["patient", "user"]
- remove_classes_count = [
- 1 for label in non_objects_labels if label in label_to_ind.keys()
- ]
- num_det_classes = len(label_to_ind) - len(
- remove_classes_count
- ) # accomedate 2 hands instead of 1, accomedate top 3 objects
+ num_det_classes = len(obj_label_to_ind)
- det_class_max_conf = np.zeros((num_det_classes, top_n_objects))
+ # Maximum confidence observe per-class across input object detections.
+ # If a class has not been observed, it is set to 0 confidence.
+ det_class_max_conf = np.zeros((num_det_classes, top_k_objects))
# The bounding box of the maximally confident detection
- det_class_bbox = np.zeros((top_n_objects, num_det_classes, 4), dtype=np.float64)
+ det_class_bbox = np.zeros((top_k_objects, num_det_classes, 4), dtype=np.float64)
det_class_bbox[:] = default_bbox
# Binary mask indicate which detection classes are present on this frame.
- det_class_mask = np.zeros(num_det_classes, dtype=np.bool_)
+ det_class_mask = np.zeros((top_k_objects, num_det_classes), dtype=np.bool_)
- # print(f"label vec: {label_vec}")
+ # Record the most confident detection for each object class as recorded in
+ # `obj_label_to_ind` (confidence & bbox)
for i, label in enumerate(label_vec):
- if label in label_to_ind:
- conf = label_confidences[i]
- ind = label_to_ind[label]
- det_class_mask[ind] = True
- conf_list = det_class_max_conf[ind, :]
- if conf > det_class_max_conf[ind].min():
- first_zero = np.where(conf_list == conf_list.min()) # [0][0]
- first_zero = first_zero[0][0]
- conf_list[first_zero] = conf
- obj_index = np.where(conf_list == conf)
-
- det_class_max_conf[ind] = conf_list
- det_class_bbox[obj_index, ind] = [xs[i], ys[i], ws[i], hs[i]] # xywh
+ assert label in obj_label_to_ind, f"Label {label} is unknown"
+
+ conf = label_confidences[i]
+ ind = obj_label_to_ind[label]
+
+ conf_list = det_class_max_conf[ind, :]
+ if conf > det_class_max_conf[ind].min():
+ # Replace the lowest confidence object with our new higher confidence object
+ min_conf_ind = np.where(conf_list == conf_list.min())[0][0]
+
+ conf_list[min_conf_ind] = conf
+ det_class_bbox[min_conf_ind, ind] = [xs[i], ys[i], ws[i], hs[i]]
+ det_class_mask[min_conf_ind, ind] = True
+
+ # Sort the confidences to determine the top_k order
+ sorted_index = np.argsort(conf_list)[::-1]
+ sorted_conf_list = np.array([conf_list[k] for k in sorted_index])
+
+ # Reorder the values to match the confidence top_k order
+ det_class_max_conf[ind] = sorted_conf_list
+
+ bboxes = det_class_bbox.copy()
+ mask = det_class_mask.copy()
+ for idx, sorted_ind in enumerate(sorted_index):
+ det_class_bbox[idx, ind] = bboxes[sorted_ind, ind]
+ det_class_mask[idx, ind] = mask[sorted_ind, ind]
det_class_kwboxes = kwimage.Boxes(det_class_bbox, "xywh")
@@ -284,7 +796,7 @@ def obj_det2d_set_to_feature_by_method(
# util functions
#########################
def find_hand(hand_str):
- hand_idx = label_to_ind[hand_str]
+ hand_idx = obj_label_to_ind[hand_str]
hand_conf = det_class_max_conf[hand_idx][0]
hand_bbox = kwimage.Boxes([det_class_bbox[0, hand_idx]], "xywh")
@@ -292,8 +804,8 @@ def find_hand(hand_str):
def dist_to_center(center1, center2):
center_dist = [
- center2[0][0][0] - center1[0][0][0],
- center2[1][0][0] - center1[1][0][0],
+ center1[0][0][0] - center2[0][0][0],
+ center1[1][0][0] - center2[1][0][0],
]
return center_dist
@@ -310,83 +822,114 @@ def dist_to_center(center1, center2):
"hand (left)"
)
- RIGHT_IDX = 0
- LEFT_IDX = 1
right_left_hand_kwboxes = det_class_kwboxes[0, [right_hand_idx, left_hand_idx]]
+
# Mask detailing hand presence in the scene.
- hand_mask = det_class_mask[[right_hand_idx, left_hand_idx]]
- # 2-D mask object class gate per hand
- hand_by_object_mask = np.dot(hand_mask[:, None], det_class_mask[None, :])
+ RIGHT_IDX = 0
+ LEFT_IDX = 1
+ hand_mask = [det_class_mask[0][right_hand_idx], det_class_mask[0][left_hand_idx]]
+ # Mask detailing hand and object presence in the scene.
+ hand_by_object_mask_k = np.zeros(
+ (top_k_objects, 2, num_det_classes), dtype=np.bool_
+ )
+
+ for object_k_index in range(top_k_objects):
+ x = np.array(
+ [
+ [
+ hand_mask[RIGHT_IDX] and det_class
+ for det_class in det_class_mask[object_k_index]
+ ],
+ [
+ hand_mask[LEFT_IDX] and det_class
+ for det_class in det_class_mask[object_k_index]
+ ],
+ ]
+ )
+ hand_by_object_mask_k[object_k_index] = x
#########################
- # Distances
+ # Hand distances
#########################
if use_hand_dist:
# Compute distances to the right and left hands. Distance to the hand
# is defined by `hand.center - object.center`.
# `kwcoco.Boxes.center` returns a tuple of two arrays, each shaped
# [n_boxes, 1].
-
all_obj_centers_x, all_obj_centers_y = det_class_kwboxes.center # [n_dets, 1]
hand_centers_x, hand_centers_y = right_left_hand_kwboxes.center # [2, 1]
- # Hand distances from objects. Shape: [2, n_dets]
- right_hand_dist_n = np.zeros(
- (top_n_objects, hand_by_object_mask.shape[1], hand_by_object_mask.shape[0])
- )
- left_hand_dist_n = np.zeros(
- (top_n_objects, hand_by_object_mask.shape[1], hand_by_object_mask.shape[0])
- )
- # print(f"left_hand_dist_n: {left_hand_dist_n.shape}")
- for object_index in range(top_n_objects):
- obj_centers_x = all_obj_centers_x[object_index]
- obj_centers_y = all_obj_centers_y[object_index] # [n_dets, 1]
+
+ # Hand distances from objects. Shape: [top_k, n_dets, 2]
+ right_hand_dist_k = np.zeros((top_k_objects, num_det_classes, 2))
+ left_hand_dist_k = np.zeros((top_k_objects, num_det_classes, 2))
+ for object_k_index in range(top_k_objects):
+ obj_centers_x = all_obj_centers_x[object_k_index]
+ obj_centers_y = all_obj_centers_y[object_k_index]
hand_dist_x = np.subtract(
hand_centers_x,
obj_centers_x.T,
- where=hand_by_object_mask,
+ where=hand_by_object_mask_k[object_k_index],
# required, otherwise indices may be left uninitialized.
- out=np.zeros(shape=hand_by_object_mask.shape),
+ out=np.zeros(shape=(2, num_det_classes)),
)
hand_dist_y = np.subtract(
hand_centers_y,
obj_centers_y.T,
- where=hand_by_object_mask,
+ where=hand_by_object_mask_k[object_k_index],
# required, otherwise indices may be left uninitialized.
- out=np.zeros(shape=hand_by_object_mask.shape),
+ out=np.zeros(shape=(2, num_det_classes)),
)
+
# Collate into arrays of (x, y) coordinates.
right_hand_dist = np.stack(
[hand_dist_x[RIGHT_IDX], hand_dist_y[RIGHT_IDX]], axis=1
)
+ # for dist in right_hand_dist:
+ # if not hand_by_object_mask_k[object_k_index][RIGHT_IDX]
left_hand_dist = np.stack(
[hand_dist_x[LEFT_IDX], hand_dist_y[LEFT_IDX]], axis=1
)
- right_hand_dist_n[object_index] = right_hand_dist
- left_hand_dist_n[object_index] = left_hand_dist
+ right_hand_dist_k[object_k_index] = right_hand_dist
+ left_hand_dist_k[object_k_index] = left_hand_dist
else:
- right_hand_dist = left_hand_dist = None
+ right_hand_dist_k = left_hand_dist_k = None
+ #########################
+ # Image center
+ # distances
+ #########################
if use_center_dist:
- image_center = kwimage.Boxes([default_bbox], "xywh").center
+ image_center = kwimage.Boxes(
+ [0, 0, 1280, 720], "xywh"
+ ).center # Hard coded image size
default_center_dist = [image_center[0][0][0] * 2, image_center[1][0][0] * 2]
- distances_to_center = []
- for i in range(num_det_classes):
- obj_conf = det_class_max_conf[i]
+ # Object distances from image center. Shape: [top_k, n_dets, 2]
+ image_center_obj_dist_k = np.zeros((top_k_objects, num_det_classes, 2))
+ for object_k_index in range(top_k_objects):
+ obj_centers_x = all_obj_centers_x[object_k_index]
+ obj_centers_y = all_obj_centers_y[object_k_index]
- obj_bbox = kwimage.Boxes([det_class_bbox[i]], "xywh")
- obj_center = obj_bbox.center
+ for obj_ind in range(num_det_classes):
+ obj_conf = det_class_max_conf[obj_ind]
- center_dist = (
- dist_to_center(image_center, obj_center)
- if obj_conf != 0
- else default_center_dist
- )
+ obj_bbox = kwimage.Boxes(
+ [det_class_bbox[object_k_index][obj_ind]], "xywh"
+ )
+ obj_center = obj_bbox.center
+
+ center_dist = (
+ dist_to_center(image_center, obj_center)
+ if obj_conf != 0
+ else default_center_dist
+ )
- distances_to_center.append(center_dist)
+ image_center_obj_dist_k[object_k_index][obj_ind] = center_dist
+ else:
+ image_center_obj_dist_k = None
#########################
# Intersection
@@ -397,17 +940,13 @@ def dist_to_center(center1, center2):
# intersected by the representative object bounding-box.
# If a hand or object is not present in the scene, then their
# respective intersection area is 0.
- right_hand_intersection_n = np.zeros(
- (top_n_objects, hand_by_object_mask.shape[1])
- )
- left_hand_intersection_n = np.zeros(
- (top_n_objects, hand_by_object_mask.shape[1])
- )
- for object_index in range(top_n_objects):
+ # Shape: [top_k, n_dets]
+ right_hand_intersection_k = np.zeros((top_k_objects, num_det_classes))
+ left_hand_intersection_k = np.zeros((top_k_objects, num_det_classes))
+ for object_k_index in range(top_k_objects):
+ obj_bboxes = det_class_kwboxes[object_k_index]
- hand_obj_intersection_vol = right_left_hand_kwboxes.isect_area(
- det_class_kwboxes[object_index]
- )
+ hand_obj_intersection_vol = right_left_hand_kwboxes.isect_area(obj_bboxes)
right_left_hand_area = right_left_hand_kwboxes.area
# Handling avoiding div-by-zero using the `where` parameter.
@@ -419,32 +958,73 @@ def dist_to_center(center1, center2):
# indices where `right_left_hand_area == 0`.
out=np.zeros_like(hand_obj_intersection_vol),
)
-
right_hand_intersection = hand_obj_intersection[0]
left_hand_intersection = hand_obj_intersection[1]
- right_hand_intersection_n[object_index] = right_hand_intersection
- left_hand_intersection_n[object_index] = left_hand_intersection
-
+ right_hand_intersection_k[object_k_index] = right_hand_intersection
+ left_hand_intersection_k[object_k_index] = left_hand_intersection
else:
- right_hand_intersection = left_hand_intersection = None
+ right_hand_intersection_k = left_hand_intersection_k = None
#########################
- # Feature vector
+ # Joints
#########################
- feature_vec = []
- # Add hand data
- for object_index in range(top_n_objects):
+ def calc_joint_offset(bbox_center_x, bbox_center_y):
+ offset_vector = []
+ if pose_keypoints == zero_joint_offset or (
+ bbox_center_x == default_center_list[0]
+ and bbox_center_y == default_center_list[1]
+ ):
+ # If we don't have the joints or the object, return default values
+ for joint in pose_keypoints:
+ offset_vector.append(default_dist)
+ return offset_vector
+
+ for joint in pose_keypoints:
+ jx, jy = joint["xy"]
+ joint_point = [jx, jy]
+
+ dist = [bbox_center_x - joint_point[0], bbox_center_y - joint_point[1]]
+ offset_vector.append(dist)
+
+ return offset_vector
+
+ # HAND - JOINTS
+ if use_joint_hand_offset:
+ joint_right_hand_offset = calc_joint_offset(
+ right_hand_center[0][0][0], right_hand_center[1][0][0]
+ )
+ joint_left_hand_offset = calc_joint_offset(
+ left_hand_center[0][0][0], left_hand_center[1][0][0]
+ )
+
+ # OBJECTS - JOINTS
+ if use_joint_object_offset:
+ # Object distances from patient joints. Shape: [top_k, n_dets, 22, 2]
+ obj_joints_dist_k = np.zeros((top_k_objects, num_det_classes, 22, 2))
+ for object_k_index in range(top_k_objects):
+ obj_centers_x = all_obj_centers_x[object_k_index]
+ obj_centers_y = all_obj_centers_y[object_k_index]
- right_hand_dist = right_hand_dist_n[object_index]
- left_hand_dist = left_hand_dist_n[object_index]
+ joint_object_offset = []
+ for obj_ind in range(num_det_classes):
+ offset_vector = calc_joint_offset(
+ obj_centers_x[obj_ind], obj_centers_y[obj_ind]
+ )
+ joint_object_offset.append(offset_vector)
- right_hand_intersection = right_hand_intersection_n[object_index]
- left_hand_intersection = left_hand_intersection_n[object_index]
+ obj_joints_dist_k[object_k_index] = joint_object_offset
+
+ #########################
+ # Feature vector
+ #########################
+ feature_vec = []
- for hand_conf, hand_idx, hand_dist, hand_intersection in [
- (right_hand_conf, right_hand_idx, right_hand_dist, right_hand_intersection),
- (left_hand_conf, left_hand_idx, left_hand_dist, left_hand_intersection),
+ for object_k_index in range(top_k_objects):
+ # HANDS
+ for hand_conf, hand_idx, hand_dist in [
+ (right_hand_conf, right_hand_idx, right_hand_dist_k[object_k_index]),
+ (left_hand_conf, left_hand_idx, left_hand_dist_k[object_k_index]),
]:
if use_activation:
feature_vec.append([hand_conf])
@@ -457,32 +1037,47 @@ def dist_to_center(center1, center2):
]
feature_vec.append(hd1)
if use_center_dist:
- feature_vec.append(distances_to_center[hand_idx])
-
- # print(f"top-N objects feature_vec: {len(feature_vec)}")
- # Add distance and intersection between hands.
- # This is already there since the hands are in dets_class
+ feature_vec.append(image_center_obj_dist_k[0][hand_idx])
+ # RIGHT-LEFT HAND
if use_hand_dist:
- feature_vec.append(right_hand_dist[left_hand_idx])
-
+ feature_vec.append(right_hand_dist_k[0][left_hand_idx])
if use_intersection:
- feature_vec.append([right_hand_intersection[left_hand_idx]])
+ feature_vec.append([right_hand_intersection_k[0][left_hand_idx]])
- # Add object data
- for i in range(num_det_classes):
- if i in [right_hand_idx, left_hand_idx]:
+ # OBJECTS
+ for obj_ind in range(num_det_classes):
+ if obj_ind in [right_hand_idx, left_hand_idx]:
# We already have the hand data
continue
if use_activation:
- feature_vec.append([det_class_max_conf[i][object_index]])
+ feature_vec.append([det_class_max_conf[obj_ind][object_k_index]])
if use_intersection:
- feature_vec.append([right_hand_intersection[i]])
- feature_vec.append([left_hand_intersection[i]])
+ feature_vec.append([right_hand_intersection_k[object_k_index][obj_ind]])
+ feature_vec.append([left_hand_intersection_k[object_k_index][obj_ind]])
if use_center_dist:
- feature_vec.append(distances_to_center[i])
+ feature_vec.append(image_center_obj_dist_k[object_k_index][obj_ind])
+
+ # HANDS-JOINTS
+ if use_joint_hand_offset:
+ for lh_offset in joint_left_hand_offset:
+ feature_vec.append(lh_offset)
+
+ for rh_offset in joint_right_hand_offset:
+ feature_vec.append(rh_offset)
+
+ # OBJ-JOINTS
+ if use_joint_object_offset:
+ for object_k_index in range(top_k_objects):
+ for obj_ind in range(num_det_classes):
+ if obj_ind in [right_hand_idx, left_hand_idx]:
+ # We already have the hand data
+ continue
+ for offset in obj_joints_dist_k[object_k_index][obj_ind]:
+ feature_vec.append(offset)
feature_vec = [item for sublist in feature_vec for item in sublist] # flatten
+ feature_vec = np.array(feature_vec, dtype=np.float64)
return feature_vec
diff --git a/angel_system/data/common/cli/str_to_id_csv.py b/angel_system/data/common/cli/str_to_id_csv.py
index 349af6155..5fc369e4f 100644
--- a/angel_system/data/common/cli/str_to_id_csv.py
+++ b/angel_system/data/common/cli/str_to_id_csv.py
@@ -4,12 +4,7 @@
import csv
import argparse
-from angel_system.data.common.load_data import (
- activities_from_dive_csv,
- objs_as_dataframe,
- time_from_name,
- sanitize_str,
-)
+from angel_system.data.common.load_data import sanitize_str
def str_to_id(activity_config_fn, activity_gt_dir):
diff --git a/angel_system/data/common/create_custom_learn_video_dataset.py b/angel_system/data/common/create_custom_learn_video_dataset.py
index 90e21c6c9..9a8b9db1d 100644
--- a/angel_system/data/common/create_custom_learn_video_dataset.py
+++ b/angel_system/data/common/create_custom_learn_video_dataset.py
@@ -65,9 +65,9 @@ def main(args):
):
temp_df = df[df["# 1: Detection or Track-id"] == str(label)]
if temp_df.iloc[0]["10-11+: Repeated Species"] not in label_dict.keys():
- label_dict[
- temp_df.iloc[0]["10-11+: Repeated Species"]
- ] = label_counter
+ label_dict[temp_df.iloc[0]["10-11+: Repeated Species"]] = (
+ label_counter
+ )
label_counter += 1
min_frame = pd.to_numeric(temp_df["3: Unique Frame Identifier"]).min()
max_frame = pd.to_numeric(temp_df["3: Unique Frame Identifier"]).max()
diff --git a/angel_system/data/common/kwcoco_utils.py b/angel_system/data/common/kwcoco_utils.py
index 47fac3564..8446c26fe 100644
--- a/angel_system/data/common/kwcoco_utils.py
+++ b/angel_system/data/common/kwcoco_utils.py
@@ -26,8 +26,8 @@
from angel_system.data.common.load_data import (
activities_from_dive_csv,
objs_as_dataframe,
- time_from_name,
sanitize_str,
+ time_from_name,
)
from angel_system.data.common.load_data import Re_order
@@ -49,7 +49,7 @@ def load_kwcoco(dset):
return dset
-def add_activity_gt_to_kwcoco(task, dset):
+def add_activity_gt_to_kwcoco(topic, task, dset, activity_config_fn):
"""Takes an existing kwcoco file and fills in the "activity_gt"
field on each image based on the activity annotations.
@@ -61,9 +61,18 @@ def add_activity_gt_to_kwcoco(task, dset):
# Load kwcoco file
dset = load_kwcoco(dset)
- data_dir = f"/data/PTG/{task}/"
+ data_dir = f"/data/PTG/{topic}/"
activity_gt_dir = f"{data_dir}/activity_anns"
+ # Load activity config
+ with open(activity_config_fn, "r") as stream:
+ activity_config = yaml.safe_load(stream)
+ activity_labels = activity_config["labels"]
+ label_version = activity_config["version"]
+
+ activity_gt_dir = f"{activity_gt_dir}/{task}_labels/"
+
+ # Add ground truth to kwcoco
for video_id in dset.index.videos.keys():
video = dset.index.videos[video_id]
video_name = video["name"]
@@ -71,18 +80,11 @@ def add_activity_gt_to_kwcoco(task, dset):
if "_extracted" in video_name:
video_name = video_name.split("_extracted")[0]
- video_skill = "m2" # video["recipe"]
- with open(
- f"../config/activity_labels/{task}/task_{video_skill}.yaml", "r"
- ) as stream:
- recipe_activity_config = yaml.safe_load(stream)
- recipe_activity_labels = recipe_activity_config["labels"]
-
- recipe_activity_gt_dir = f"{activity_gt_dir}/{video_skill}_labels/"
-
- activity_gt_fn = f"{recipe_activity_gt_dir}/{video_name}_activity_labels_v2.csv"
- gt = activities_from_dive_csv(activity_gt_fn)
+ activity_gt_fn = (
+ f"{activity_gt_dir}/{video_name}_activity_labels_v{label_version}.csv"
+ )
+ gt = activities_from_dive_csv(topic, activity_gt_fn)
gt = objs_as_dataframe(gt)
image_ids = dset.index.vidid_to_gids[video_id]
@@ -90,9 +92,14 @@ def add_activity_gt_to_kwcoco(task, dset):
# Update the activity gt for each image
for gid in sorted(image_ids):
im = dset.imgs[gid]
- frame_idx, time = time_from_name(im["file_name"])
+ frame_idx, time = time_from_name(im["file_name"], topic)
- matching_gt = gt.loc[(gt["start"] <= time) & (gt["end"] >= time)]
+ if time:
+ matching_gt = gt.loc[(gt["start"] <= time) & (gt["end"] >= time)]
+ else:
+ matching_gt = gt.loc[
+ (gt["start_frame"] <= frame_idx) & (gt["end_frame"] >= frame_idx)
+ ]
if matching_gt.empty:
label = "background"
@@ -105,9 +112,7 @@ def add_activity_gt_to_kwcoco(task, dset):
try:
activity = [
- x
- for x in recipe_activity_labels
- if int(x["id"]) == int(float(label))
+ x for x in activity_labels if int(x["id"]) == int(float(label))
]
except:
activity = []
@@ -131,6 +136,7 @@ def add_activity_gt_to_kwcoco(task, dset):
# dset.fpath = dset.fpath.split(".")[0] + "_fixed.mscoco.json"
dset.dump(dset.fpath, newlines=True)
+ return dset
def visualize_kwcoco_by_label(dset=None, save_dir=""):
@@ -163,9 +169,6 @@ def visualize_kwcoco_by_label(dset=None, save_dir=""):
fn = im["file_name"].split("/")[-1]
gt = im.get("activity_gt", "")
- if not gt:
- gt = ""
- # act_pred = im.get("activity_pred", "")
fig, ax = plt.subplots()
# title = f"GT: {gt}, PRED: {act_pred}"
@@ -179,13 +182,12 @@ def visualize_kwcoco_by_label(dset=None, save_dir=""):
aids = gid_to_aids[gid]
anns = ub.dict_subset(dset.anns, aids)
- using_contact = False
+
for aid, ann in anns.items():
conf = ann.get("confidence", 1)
# if conf < 0.1:
# continue
- x, y, w, h = ann["bbox"] # xywh
cat_id = ann["category_id"]
cat = dset.cats[cat_id]["name"]
@@ -193,6 +195,8 @@ def visualize_kwcoco_by_label(dset=None, save_dir=""):
color = colors[obj_labels.index(cat)]
+ # bbox
+ x, y, w, h = ann["bbox"] # xywh
rect = patches.Rectangle(
(x, y),
w,
@@ -204,6 +208,44 @@ def visualize_kwcoco_by_label(dset=None, save_dir=""):
)
ax.add_patch(rect)
+
+ # keypoints
+ if "keypoints" in ann.keys():
+ kp_connections = {
+ "nose": ["mouth"],
+ "mouth": ["throat"],
+ "throat": ["chest", "left_upper_arm", "right_upper_arm"],
+ "chest": ["back"],
+ "left_upper_arm": ["left_lower_arm"],
+ "left_lower_arm": ["left_wrist"],
+ "left_wrist": ["left_hand"],
+ "right_upper_arm": ["right_lower_arm"],
+ "right_lower_arm": ["right_wrist"],
+ "right_wrist": ["right_hand"],
+ "back": ["left_upper_leg", "right_upper_leg"],
+ "left_upper_leg": ["left_knee"],
+ "left_knee": ["left_lower_leg"],
+ "left_lower_leg": ["left_foot"],
+ "right_upper_leg": ["right_knee"],
+ "right_knee": ["right_lower_leg"],
+ "right_lower_leg": ["right_foot"],
+ }
+ kps = {}
+
+ for kp in ann["keypoints"]:
+ kps[kp["keypoint_category"]] = kp["xy"]
+
+ for kp_cat, connects in kp_connections.items():
+ for connect in connects:
+ pt1 = kps[kp_cat]
+ pt2 = kps[connect]
+ ax.plot(
+ [pt1[0], pt2[0]],
+ [pt1[1], pt2[1]],
+ color=color,
+ marker="o",
+ )
+
ax.annotate(label, (x, y), color="black", annotation_clip=False)
video_dir = (
@@ -221,12 +263,12 @@ def visualize_kwcoco_by_label(dset=None, save_dir=""):
plt.close("all")
-def imgs_to_video(imgs_dir):
+def imgs_to_video(imgs_dir, topic):
"""Convert directory of images to a video"""
video_name = imgs_dir.split("/")[-1] + ".avi"
images = glob.glob(f"{imgs_dir}/*.png")
- images = sorted(images, key=lambda x: time_from_name(x)[0])
+ images = sorted(images, key=lambda x: time_from_name(x, topic)[0])
frame = cv2.imread(images[0])
height, width, layers = frame.shape
diff --git a/angel_system/data/common/load_data.py b/angel_system/data/common/load_data.py
index 122d0f31a..d1801a133 100644
--- a/angel_system/data/common/load_data.py
+++ b/angel_system/data/common/load_data.py
@@ -30,6 +30,15 @@ def sanitize_str(str_: str):
return str_.lower().strip(" .")
+def time_from_name(fname, topic="cooking"):
+ if topic == "medical":
+ from angel_system.data.medical.load_bbn_data import time_from_name as tfn
+ elif topic == "cooking":
+ from angel_system.data.cooking.load_kitware_data import time_from_name as tfn
+
+ return tfn(fname)
+
+
def Re_order(image_list, image_number):
img_id_list = []
for img in image_list:
@@ -44,33 +53,6 @@ def Re_order(image_list, image_number):
return new_list
-RE_FILENAME_TIME = re.compile(
- r"frame_(?P\d+)_(?P\d+(?:_|.)\d+).(?P\w+)"
-)
-
-
-def time_from_name(fname):
- """
- Extract the float timestamp from the filename.
-
- :param fname: Filename of an image in the format
- frame___.
-
- :return: timestamp (float) in seconds
- """
- fname = os.path.basename(fname)
- match = RE_FILENAME_TIME.match(fname)
- time = match.group("ts")
- if "_" in time:
- time = time.split("_")
- time = float(time[0]) + (float(time[1]) * 1e-9)
- elif "." in time:
- time = float(time)
-
- frame = match.group("frame")
- return int(frame), time
-
-
def load_from_file(
gt_fn, detections_fn
) -> Tuple[List[str], pd.DataFrame, pd.DataFrame]:
@@ -138,7 +120,7 @@ def load_from_file(
return labels, gt, detections
-def activities_from_dive_csv(filepath: str) -> List[Activity]:
+def activities_from_dive_csv(topic, filepath: str) -> List[Activity]:
"""
Load from a DIVE output CSV file a sequence of ground truth activity
annotations.
@@ -149,6 +131,11 @@ def activities_from_dive_csv(filepath: str) -> List[Activity]:
:param filepath: Filesystem path to the CSV file.
:return: List of loaded activity annotations.
"""
+ if topic == "medical":
+ from angel_system.data.medical.load_bbn_data import time_from_name
+ elif topic == "cooking":
+ from angel_system.data.cooking.load_kitware_data import time_from_name
+
print(f"Loading ground truth activities from: {filepath}")
df = pd.read_csv(filepath)
diff --git a/angel_system/data/cooking/load_kitware_data.py b/angel_system/data/cooking/load_kitware_data.py
index 5c6d5d99b..e57363545 100644
--- a/angel_system/data/cooking/load_kitware_data.py
+++ b/angel_system/data/cooking/load_kitware_data.py
@@ -1,13 +1,41 @@
+import os
import kwcoco
import glob
import warnings
-
+import re
import ubelt as ub
from angel_system.data.common.load_data import Re_order
from angel_system.data.common.kwcoco_utils import load_kwcoco
+RE_FILENAME_TIME = re.compile(
+ r"frame_(?P\d+)_(?P\d+(?:_|.)\d+).(?P\w+)"
+)
+
+
+def time_from_name(fname):
+ """
+ Extract the float timestamp from the filename.
+
+ :param fname: Filename of an image in the format
+ frame___.
+
+ :return: timestamp (float) in seconds
+ """
+ fname = os.path.basename(fname)
+ match = RE_FILENAME_TIME.match(fname)
+ time = match.group("ts")
+ if "_" in time:
+ time = time.split("_")
+ time = float(time[0]) + (float(time[1]) * 1e-9)
+ elif "." in time:
+ time = float(time)
+
+ frame = match.group("frame")
+ return int(frame), time
+
+
def object_label_fixes(obj_cat):
# Fix some deprecated labels
if obj_cat in ["timer", "timer (20)", "timer (30)", "timer (else)"]:
diff --git a/angel_system/data/medical/load_bbn_data.py b/angel_system/data/medical/load_bbn_data.py
index 5c08123bb..9103068b8 100644
--- a/angel_system/data/medical/load_bbn_data.py
+++ b/angel_system/data/medical/load_bbn_data.py
@@ -4,6 +4,7 @@
This should be run on videos not used during training.
"""
+
import os
import re
import glob
@@ -11,6 +12,7 @@
import kwcoco
import kwimage
import shutil
+import warnings
import pandas as pd
import numpy as np
@@ -21,6 +23,26 @@
os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1, 2, 3"
+RE_FILENAME_TIME = re.compile(r"(?P\w+)-(?P\d+)_(?P\d+).(?P\w+)")
+
+
+def time_from_name(fname):
+ """
+ Extract the float timestamp from the filename.
+
+ :param fname: Filename of an image in the format
+ frame___.
+
+ :return: timestamp (float) in seconds
+ """
+ fname = os.path.basename(fname)
+ match = RE_FILENAME_TIME.match(fname)
+
+ frame = match.group("frame")
+ time = None
+ return int(frame), time
+
+
def dive_to_activity_file(videos_dir):
"""DIVE CSV to BBN TXT frame-level annotation file format"""
for dive_csv in glob.glob(f"{videos_dir}/*/*.csv"):
@@ -207,42 +229,65 @@ def save_as_kwcoco(classes, data, save_fn="bbn-data.mscoco.json"):
dset.dump(dset.fpath, newlines=True)
-def activity_label_fixes(activity_label, target):
- if activity_label == "put_tourniquet_around":
- label = "place-tourniquet"
- label_id = 1
- if activity_label == "pulls_tight":
- label = "pull-tight"
- label_id = 2
- if activity_label == "secures" and target == "velcro_strap":
- label = "apply-strap-to-strap-body"
- label_id = 3
- if activity_label == "twist" and target == "windlass":
- label = "turn-windless"
- label_id = 4
- if (
- activity_label == "locks_into_windlass_keeper"
- or activity_label == "lock_into_windlass_keeper"
- ):
- label = "lock-windless"
- label_id = 5
- if (
- activity_label == "wraps_remaining_strap_around"
- or activity_label == "wrap_remaining_strap_around"
- ):
- label = "pull-remaining-strap"
- label_id = 6
- if activity_label == "secures" and target == "windlass":
- label = "secure-strap"
- label_id = 7
- if activity_label == "writes_on" and target == "tourniquet_label":
- label = "mark-time"
- label_id = 8
+def activity_label_fixes(task, activity_label, target):
+ # print(activity_label, target)
+ if task == "m2":
+ if activity_label == "put_tourniquet_around":
+ label = "place-tourniquet"
+ label_id = 1
+ if activity_label == "pulls_tight":
+ label = "pull-tight"
+ label_id = 2
+ if activity_label == "secures" and target == "velcro_strap":
+ label = "apply-strap-to-strap-body"
+ label_id = 3
+ if activity_label == "twist" and target == "windlass":
+ label = "turn-windless"
+ label_id = 4
+ if (
+ activity_label == "locks_into_windlass_keeper"
+ or activity_label == "lock_into_windlass_keeper"
+ ):
+ label = "lock-windless"
+ label_id = 5
+ if (
+ activity_label == "wraps_remaining_strap_around"
+ or activity_label == "wrap_remaining_strap_around"
+ ):
+ label = "pull-remaining-strap"
+ label_id = 6
+ if activity_label == "secures" and target == "windlass":
+ label = "secure-strap"
+ label_id = 7
+ if activity_label == "writes_on" and target == "tourniquet_label":
+ label = "mark-time"
+ label_id = 8
+ elif task == "r18":
+ if activity_label == "apply_pressure_to" and target == "casualty_wound":
+ label = "cover-seal-wound"
+ label_id = 1
+ if (
+ activity_label == "grabs"
+ or activity_label == "opens"
+ or activity_label == "removes"
+ or activity_label == "discard"
+ ) and (target == "hyfin_package" or target == "gauze"):
+ label = "open-pack"
+ label_id = 2
+ if activity_label == "wipes_gauze_on":
+ label = "clean-wound-site"
+ label_id = 3
+ if activity_label == "removes" and target == "chest_seal_backing":
+ label = "peel-seal-backer"
+ label_id = 4
+ if activity_label == "apply" and target == "chest_seal":
+ label = "place-seal"
+ label_id = 5
return label, label_id
-def bbn_activity_txt_to_csv(root_dir, output_dir):
+def bbn_activity_txt_to_csv(task, root_dir, output_dir, label_version):
"""
Generate DIVE csv format activity annotations from BBN's text annotations
@@ -263,21 +308,22 @@ def bbn_activity_txt_to_csv(root_dir, output_dir):
# Lab videos
action_fns = glob.glob(f"{root_dir}/*/*_skills_frame.txt")
if not action_fns:
- warnings.warn(f"No text annotations found in {root_dir}")
+ warnings.warn(f"No text annotations found in {root_dir} subfolders")
return
- for action_txt_fn in action_fns:
+ for action_txt_fn in sorted(action_fns):
track_id = 0
video_dir = os.path.dirname(action_txt_fn)
video_name = os.path.basename(video_dir)
if video_name in KNOWN_BAD_VIDEOS:
continue
- action_f = open(action_txt_fn)
- lines = action_f.readlines()
+ print(action_txt_fn)
+ with open(action_txt_fn) as action_f:
+ lines = action_f.readlines()
# Create output csv
- csv_fn = f"{output_dir}/{video_name}_activity_labels_v2.csv"
+ csv_fn = f"{output_dir}/{video_name}_activity_labels_v{label_version}.csv"
csv_f = open(csv_fn, "w")
csv_f.write(
"# 1: Detection or Track-id,2: Video or Image Identifier,3: Unique Frame Identifier,4-7: Img-bbox(TL_x,TL_y,BR_x,BR_y),8: Detection or Length Confidence,9: Target Length (0 or -1 if invalid),10-11+: Repeated Species,Confidence Pairs or Attributes\n"
@@ -292,10 +338,10 @@ def bbn_activity_txt_to_csv(root_dir, output_dir):
end_frame = int(data[1])
start_frame_fn = os.path.basename(
- glob.glob(f"{video_dir}/images/frame_{start_frame}_*.png")[0]
+ glob.glob(f"{video_dir}/images/*_{start_frame}.png")[0]
)
end_frame_fn = os.path.basename(
- glob.glob(f"{video_dir}/images/frame_{end_frame}_*.png")[0]
+ glob.glob(f"{video_dir}/images/*_{end_frame}.png")[0]
)
# Determine activity
@@ -307,7 +353,7 @@ def bbn_activity_txt_to_csv(root_dir, output_dir):
# convert activity_str info to our activity labels
# this is hacky: fix later
label = None
- label, label_id = activity_label_fixes(activity_label, target)
+ label, label_id = activity_label_fixes(task, activity, target)
if label is not None:
line1 = f"{track_id},{start_frame_fn},{start_frame},1,1,2,2,1,-1,{label_id},1"
@@ -318,7 +364,6 @@ def bbn_activity_txt_to_csv(root_dir, output_dir):
csv_f.write(f"{line2}\n")
track_id += 1
- action_f.close()
csv_f.close()
diff --git a/angel_system/global_step_prediction/global_step_predictor.py b/angel_system/global_step_prediction/global_step_predictor.py
index 5841879bb..24063bf95 100644
--- a/angel_system/global_step_prediction/global_step_predictor.py
+++ b/angel_system/global_step_prediction/global_step_predictor.py
@@ -235,9 +235,9 @@ def initialize_new_recipe_tracker(self, recipe, config_fn=None):
},
)
- tracker_dict[
- "last_granular_step_per_broad_step"
- ] = self.get_last_granular_step_per_broad_step(broad_steps)
+ tracker_dict["last_granular_step_per_broad_step"] = (
+ self.get_last_granular_step_per_broad_step(broad_steps)
+ )
tracker_dict["recipe"] = recipe
tracker_dict["current_broad_step"] = 0
@@ -254,9 +254,9 @@ def initialize_new_recipe_tracker(self, recipe, config_fn=None):
tracker_dict["broad_step_to_activity_ids"] = [
self.get_unique(step["activity_ids"]) for step in broad_steps
]
- tracker_dict[
- "granular_step_to_activity_id"
- ] = self.get_activity_per_granular_step(broad_steps)
+ tracker_dict["granular_step_to_activity_id"] = (
+ self.get_activity_per_granular_step(broad_steps)
+ )
# Labels
tracker_dict["broad_step_to_label"] = [step["label"] for step in broad_steps]
@@ -300,9 +300,9 @@ def increment_granular_step(self, tracker_ind):
if current_granular_step < num_granular_steps:
self.trackers[tracker_ind]["current_granular_step"] += 1
- self.trackers[tracker_ind][
- "current_broad_step"
- ] = self.granular_to_broad_step(tracker, current_granular_step)
+ self.trackers[tracker_ind]["current_broad_step"] = (
+ self.granular_to_broad_step(tracker, current_granular_step)
+ )
elif current_granular_step == num_granular_steps and tracker["active"] == True:
self.trackers[tracker_ind]["active"] = False
else:
@@ -335,9 +335,9 @@ def decrement_granular_step(self, tracker_ind):
if current_granular_step > 0:
self.trackers[tracker_ind]["current_granular_step"] -= 1
- self.trackers[tracker_ind][
- "current_broad_step"
- ] = self.granular_to_broad_step(tracker, current_granular_step)
+ self.trackers[tracker_ind]["current_broad_step"] = (
+ self.granular_to_broad_step(tracker, current_granular_step)
+ )
else:
raise Exception(
f"Tried to decrement tracker #{tracker_ind}: "
diff --git a/angel_system/global_step_prediction/r18_only_experiment.py b/angel_system/global_step_prediction/r18_only_experiment.py
index b8e69a34b..52da35ee9 100644
--- a/angel_system/global_step_prediction/r18_only_experiment.py
+++ b/angel_system/global_step_prediction/r18_only_experiment.py
@@ -7,7 +7,8 @@
from sklearn.metrics import confusion_matrix
import scipy.ndimage as ndi
-from angel_system.global_step_prediction.global_step_predictor import (
+# from angel_system.global_step_prediction.global_step_predictor import (
+from global_step_predictor import (
GlobalStepPredictor,
)
@@ -23,9 +24,10 @@ def run_inference_all_vids(
step_predictor = GlobalStepPredictor(
recipe_types=["r18"],
- activity_config_fpath="/home/local/KHQ/peri.akiva/projects/angel_system/config/activity_labels/r18.yaml",
+ activity_config_fpath="/home/local/KHQ/cameron.johnson/code/tmp_hannah_code/angel_system/config/activity_labels/medical/r18.yaml",
+ # activity_config_fpath="/data/PTG/medical/training/activity_classifier/TCN_HPL/logs/r18_pro_data_top_1_objs_feat_v6_NEW_ORDER_win_25/runs/2024-05-08_12-05-20/test_activity_preds.mscoco.json",
recipe_config_dict={
- "r18": "/home/local/KHQ/peri.akiva/projects/angel_system/config/tasks/r18.yaml"
+ "r18": "/home/local/KHQ/cameron.johnson/code/tmp_hannah_code/angel_system/config/tasks/medical/r18.yaml"
},
# threshold_multiplier=0.3,
# threshold_frame_count=2
@@ -36,7 +38,7 @@ def run_inference_all_vids(
else:
avg_probs = step_predictor.compute_average_TP_activations(coco_train)
np.save(
- "/home/local/KHQ/peri.akiva/projects/angel_system/model_files/global_step_predictor_act_avgs_all_classes.npy",
+ "/home/local/KHQ/cameron.johnson/code/tmp_hannah_code/angel_system/model_files/global_step_predictor_act_avgs_all_classes.npy",
avg_probs,
)
print(f"average_probs = {avg_probs}")
@@ -135,11 +137,11 @@ def get_unique(activity_ids):
if __name__ == "__main__":
coco_train = kwcoco.CocoDataset(
- "/home/local/KHQ/peri.akiva/projects/angel_system/model_files/coco/r18_test_activity_preds.mscoco.json"
+ "/data/PTG/medical/training/activity_classifier/TCN_HPL/logs/r18_pro_data_top_1_objs_feat_v6_NEW_ORDER_win_25/runs/2024-05-08_12-05-20/test_activity_preds.mscoco.json"
)
# Same file for now since I don't have another.
coco_test = kwcoco.CocoDataset(
- "/home/local/KHQ/peri.akiva/projects/angel_system/model_files/coco/r18_test_activity_preds.mscoco.json"
+ "/data/PTG/medical/training/activity_classifier/TCN_HPL/logs/r18_pro_data_top_1_objs_feat_v6_NEW_ORDER_win_25/runs/2024-05-08_12-05-20/test_activity_preds.mscoco.json"
)
recipe_config = {"r18": "config/tasks/medical/r18.yaml"}
diff --git a/ansible/roles/provision-files/vars/main.yml b/ansible/roles/provision-files/vars/main.yml
index 246c199ab..c52857443 100644
--- a/ansible/roles/provision-files/vars/main.yml
+++ b/ansible/roles/provision-files/vars/main.yml
@@ -122,16 +122,16 @@ girder_file_downloads:
sha512: 7183385f8eaca85997725a107a76034de2bd4a59c1434b4bdb7c1ac8931cf4b68a53f6e736734643386364b9f0856de795a14965b6a02bc5eb5891252e6a73c9
dest: "{{ stage_dirs.object_detector }}/r18_det.pt"
# Activity classifier
- - file_id: 6605dfc78b763ca20ae99f8c
- sha512: 0091b751e046f7816061ab11e99ffc709dd5506453e711407a7a5925d168f29ce9fd2a3d97eeeb6f08008aa62d76c7a99279024d27ce42cbd9544a590c528960
+ - file_id: 663e7294687336214e7cdc07
+ sha512: b0166110745c39bb14447ddde34f789fcf7808dc8443a5dc1371de484e995e4fc24b426d1b884321ac51f63c88560ccbbed3a09be5006afa69445ec8d9e04151
dest: "{{ stage_dirs.activity_classifier }}/r18_tcn.ckpt"
- file_id: 6606b6e9aa5c8de3874c3f4a
sha512: 3c84333390ee6b12327bb7e5debed37149c7f95cc437b16939f77b599d1a0b3b8c4f0995820b95973170b94df695494000a1c45fbf46632267c212e125fe58a3
dest: "{{ stage_dirs.activity_classifier }}/r18_mapping.txt"
# Global Step predictor model
- - file_id: 660ebb1caa5c8de3874c43c9
- sha512: 20c168a220626aaa07192c234366cbc4e998314430632e18b7ea7eb4b1bff491bcd22f131a67ba7713ade9efb49d43ce37010fdc398890deaff13c6022373667
- dest: "{{ stage_dirs.task_monitor }}/r18_test_activity_preds.mscoco.json"
+ - file_id: 663e4c5d687336214e7cdbff
+ sha512: 12b5bf950f64d9609182f795f7243edb6438b3919d920f0f527127660bb632abd5917ed149c28aa5ebcef7e7f6d8ed6edff39b27286eb0ce37a1ff7de2da4c77
+ dest: "{{ stage_dirs.task_monitor }}/global_step_predictor_act_avgs_R18.npy"
# List of git repositories to check out at a specific ref and then archive.
# Destination files will be written as GZipped TAR files, so please suffix
diff --git a/config/object_labels/medical/r18.yaml b/config/object_labels/medical/r18.yaml
index 1016122e4..d0a561e79 100644
--- a/config/object_labels/medical/r18.yaml
+++ b/config/object_labels/medical/r18.yaml
@@ -1,4 +1,4 @@
-version: "1.0"
+version: "1"
title: "R18"
labels:
# Item:
diff --git a/python-tpl/TCN_HPL b/python-tpl/TCN_HPL
index abb162229..33a8c9236 160000
--- a/python-tpl/TCN_HPL
+++ b/python-tpl/TCN_HPL
@@ -1 +1 @@
-Subproject commit abb16222925e7c1c6adba97dce458ae3bd0fdd62
+Subproject commit 33a8c92365d147a5ccb8878661eeadefd7600d14
diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py
index 185866cbd..dc374b375 100644
--- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py
+++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py
@@ -3,6 +3,7 @@
Use get_hydra_config to get cfg dict, use eval.py content as how-to-call example using
trainer.predict(model=model, dataloaders=dataloaders, ckpt_path=cfg.ckpt_path)
"""
+
import json
from heapq import heappush, heappop
from pathlib import Path
@@ -53,6 +54,10 @@
PARAM_MODEL_WEIGHTS = "model_weights"
# Filesystem path to the class mapping file.
PARAM_MODEL_MAPPING = "model_mapping"
+# Bool flag to indicate if the NormalizePixelPts augmentation should be applied
+PARAM_MODEL_NORMALIZE_PIXEL_PTS = "model_normalize_pixel_pts"
+# Bool flag to indicate if the NormalizeFromCenter augmentation should be applied
+PARAM_MODEL_NORMALIZE_CENTER_PTS = "model_normalize_center_pts"
# Filesystem path to the input object detection label mapping.
# This is expected to be a JSON file containing a list of strings.
PARAM_MODEL_OD_MAPPING = "model_det_label_mapping"
@@ -88,6 +93,8 @@
PARAM_POSE_TOPIC = "pose_topic"
+PARAM_TOPIC = "topic"
+
class NoActivityClassification(Exception):
"""
@@ -115,10 +122,12 @@ def __init__(self):
(PARAM_ACT_TOPIC,),
(PARAM_MODEL_WEIGHTS,),
(PARAM_MODEL_MAPPING,),
+ (PARAM_MODEL_NORMALIZE_PIXEL_PTS, False),
+ (PARAM_MODEL_NORMALIZE_CENTER_PTS, False),
(PARAM_MODEL_OD_MAPPING,),
(PARAM_MODEL_DEVICE, "cuda"),
(PARAM_MODEL_DETS_CONV_VERSION, 6),
- (PARAM_WINDOW_FRAME_SIZE, 45),
+ (PARAM_WINDOW_FRAME_SIZE, 25),
(PARAM_BUFFER_MAX_SIZE_SECONDS, 15),
(PARAM_IMAGE_PIX_WIDTH, 1280),
(PARAM_IMAGE_PIX_HEIGHT, 720),
@@ -126,6 +135,7 @@ def __init__(self):
(PARAM_OUTPUT_COCO_FILEPATH, ""),
(PARAM_INPUT_COCO_FILEPATH, ""),
(PARAM_TIME_TRACE_LOGGING, True),
+ (PARAM_TOPIC, "medical"),
],
)
self._img_ts_topic = param_values[PARAM_IMG_TS_TOPIC]
@@ -138,6 +148,10 @@ def __init__(self):
self._img_pix_height = param_values[PARAM_IMAGE_PIX_HEIGHT]
self._enable_trace_logging = param_values[PARAM_TIME_TRACE_LOGGING]
+ self.model_normalize_pixel_pts = param_values[PARAM_MODEL_NORMALIZE_PIXEL_PTS]
+ self.model_normalize_center_pts = param_values[PARAM_MODEL_NORMALIZE_CENTER_PTS]
+
+ self.topic = param_values[PARAM_TOPIC]
# Load in TCN classification model and weights
with SimpleTimer("Loading inference module", log.info):
self._model_device = torch.device(param_values[PARAM_MODEL_DEVICE])
@@ -145,6 +159,7 @@ def __init__(self):
param_values[PARAM_MODEL_WEIGHTS],
param_values[PARAM_MODEL_MAPPING],
self._model_device,
+ topic=self.topic,
).eval()
# from pytorch_lightning.utilities.model_summary import summarize
# from torchsummary import summary
@@ -155,7 +170,9 @@ def __init__(self):
print(f"json path: {param_values[PARAM_MODEL_OD_MAPPING]}")
with open(param_values[PARAM_MODEL_OD_MAPPING]) as infile:
det_label_list = json.load(infile)
- self._det_label_to_id = {c: i for i, c in enumerate(det_label_list)}
+ self._det_label_to_id = {
+ c: i for i, c in enumerate(det_label_list) if c not in ["patient", "user"]
+ }
print(self._det_label_to_id)
# Feature version aligned with model current architecture
self._feat_version = param_values[PARAM_MODEL_DETS_CONV_VERSION]
@@ -388,9 +405,9 @@ def _thread_populate_from_coco(self, input_coco_path: Path) -> None:
# Creates [n_det, n_label] matrix, which we assign to and then
# ravel into the message slot.
conf_mat = np.zeros((n_dets, len(obj_labels)), dtype=np.float64)
- conf_mat[
- np.arange(n_dets), image_annots.get("category_id")
- ] = image_annots.get("confidence")
+ conf_mat[np.arange(n_dets), image_annots.get("category_id")] = (
+ image_annots.get("confidence")
+ )
det_msg.label_confidences.extend(conf_mat.ravel())
# Calling the image callback last since image frames define the
@@ -724,6 +741,8 @@ def _process_window(self, window: InputWindow) -> ActivityDetection:
image_width=self._img_pix_width,
image_height=self._img_pix_height,
feature_memo=memo_object_to_feats,
+ normalize_pixel_pts=self.model_normalize_pixel_pts,
+ normalize_center_pts=self.model_normalize_center_pts,
)
# except ValueError:
# # feature detections were all None
diff --git a/ros/angel_system_nodes/angel_system_nodes/eval/mitll_eval_2_logger.py b/ros/angel_system_nodes/angel_system_nodes/eval/mitll_eval_2_logger.py
index 28da1663d..a77c28a71 100644
--- a/ros/angel_system_nodes/angel_system_nodes/eval/mitll_eval_2_logger.py
+++ b/ros/angel_system_nodes/angel_system_nodes/eval/mitll_eval_2_logger.py
@@ -6,6 +6,7 @@
This node is currently only compatible with the `global_step_predictor` task
monitoring node due to leveraging specific implementation/output semantics.
"""
+
import csv
import math
from pathlib import Path
diff --git a/ros/angel_system_nodes/angel_system_nodes/object_detection/object_and_hand_detection.py b/ros/angel_system_nodes/angel_system_nodes/object_detection/object_and_hand_detection.py
index b8b9c4851..0207ebf71 100644
--- a/ros/angel_system_nodes/angel_system_nodes/object_detection/object_and_hand_detection.py
+++ b/ros/angel_system_nodes/angel_system_nodes/object_detection/object_and_hand_detection.py
@@ -89,7 +89,7 @@ def __init__(self):
self._inference_img_size,
)
log.info(
- f"Loaded model with classes:\n"
+ f"Loaded object model with classes:\n"
+ "\n".join(f'\t- "{n}"' for n in self.object_model.names)
)
@@ -112,7 +112,12 @@ def __init__(self):
callback_group=MutuallyExclusiveCallbackGroup(),
)
+ # Hand model
self.hand_model = YOLOv8(self._hand_model_chpt_fp)
+ log.info(
+ f"Loaded hand model with classes:\n"
+ + "\n".join(f'\t- "{n}"' for n in self.hand_model.names)
+ )
if not self._no_trace:
self.object_model = TracedModel(
diff --git a/ros/angel_system_nodes/angel_system_nodes/task_monitoring/global_step_predictor.py b/ros/angel_system_nodes/angel_system_nodes/task_monitoring/global_step_predictor.py
index 4b184db33..5ab3e50d7 100644
--- a/ros/angel_system_nodes/angel_system_nodes/task_monitoring/global_step_predictor.py
+++ b/ros/angel_system_nodes/angel_system_nodes/task_monitoring/global_step_predictor.py
@@ -131,11 +131,9 @@ def __init__(self):
activity_config_fpath=self._activity_config_file,
)
- # model_file = coco file with confidence predictions
- coco = kwcoco.CocoDataset(self._model_file)
- avg_probs = self.gsp.compute_average_TP_activations(coco)
- # self.gsp.get_average_TP_activations_from_file(self._model_file)
- # log.info("Global state predictor loaded")
+ # model_file = pre-computed averages of TP activations
+ self.gsp.get_average_TP_activations_from_file(self._model_file)
+ log.info("Global state predictor loaded")
# Mapping from recipe to current step. Used to track state changes
# of the GSP and determine when to publish a TaskUpdate msg.
diff --git a/ros/angel_utils/multi_task_demo_ui/main.html b/ros/angel_utils/multi_task_demo_ui/main.html
index 5018f6fb4..1e1aaa081 100644
--- a/ros/angel_utils/multi_task_demo_ui/main.html
+++ b/ros/angel_utils/multi_task_demo_ui/main.html
@@ -15,7 +15,7 @@
diff --git a/ros/angel_utils/python/angel_utils/conversion.py b/ros/angel_utils/python/angel_utils/conversion.py
index a67d7f96d..8cf662c5c 100644
--- a/ros/angel_utils/python/angel_utils/conversion.py
+++ b/ros/angel_utils/python/angel_utils/conversion.py
@@ -1,6 +1,7 @@
"""
Various conversion functions into and out of angel_msg types.
"""
+
import array
import itertools
import math
diff --git a/ros/angel_utils/scripts/bag_extractor.py b/ros/angel_utils/scripts/bag_extractor.py
index 8e10ac97a..25947496e 100644
--- a/ros/angel_utils/scripts/bag_extractor.py
+++ b/ros/angel_utils/scripts/bag_extractor.py
@@ -153,19 +153,19 @@ def __init__(self):
if self.extract_head_pose_data or self.extract_depth_head_pose_data:
self.msg_type_to_handler_map[HeadsetPoseData] = self.handle_head_pose_msg
if self.extract_hand_pose_data:
- self.msg_type_to_handler_map[
- HandJointPosesUpdate
- ] = self.handle_hand_pose_msg
+ self.msg_type_to_handler_map[HandJointPosesUpdate] = (
+ self.handle_hand_pose_msg
+ )
if self.extract_spatial_map_data:
self.msg_type_to_handler_map[SpatialMesh] = self.handle_spatial_mesh_msg
if self.extract_annotation_event_data:
- self.msg_type_to_handler_map[
- AnnotationEvent
- ] = self.handle_annotation_event_msg
+ self.msg_type_to_handler_map[AnnotationEvent] = (
+ self.handle_annotation_event_msg
+ )
if self.extract_activity_detection_data:
- self.msg_type_to_handler_map[
- ActivityDetection
- ] = self.handle_activity_detection_msg
+ self.msg_type_to_handler_map[ActivityDetection] = (
+ self.handle_activity_detection_msg
+ )
if self.extract_task_update_data:
self.msg_type_to_handler_map[TaskUpdate] = self.handle_task_update_msg
diff --git a/tests/angel_system/impls/detect_activities/detections_to_activities/test_utils.py b/tests/angel_system/impls/detect_activities/detections_to_activities/test_utils.py
index 481b57119..1341c085b 100644
--- a/tests/angel_system/impls/detect_activities/detections_to_activities/test_utils.py
+++ b/tests/angel_system/impls/detect_activities/detections_to_activities/test_utils.py
@@ -3,7 +3,7 @@
from tcn_hpl.data.components.augmentations import NormalizePixelPts
from angel_system.activity_classification.utils import (
- obj_det2d_set_to_feature_by_method,
+ obj_det2d_set_to_feature,
)
@@ -165,14 +165,14 @@ def test_hand_distance_and_norm(self):
"water jug (open)": 5,
"water jug lid": 4,
},
- # v5 variation
- use_activation=True,
- use_hand_dist=True,
- use_intersection=True,
+ version=5,
+ top_n_objects=1,
)
- feature_vec = obj_det2d_set_to_feature_by_method(**test_input)
+ feature_vec = obj_det2d_set_to_feature(**test_input)
+ print("feature_vec: ", feature_vec)
feature_vec_normalized = feature_vec.copy()
+ print("feature_vec_normalized: ", feature_vec_normalized)
norm(feature_vec_normalized[None, ...])
# Where normalization happened, nothing should be out of the [0, 1]
diff --git a/tmux/demos/medical/Kitware-R18.yml b/tmux/demos/medical/Kitware-R18.yml
index 3209fdd80..98d178283 100644
--- a/tmux/demos/medical/Kitware-R18.yml
+++ b/tmux/demos/medical/Kitware-R18.yml
@@ -98,7 +98,7 @@ windows:
-p task_error_topic:=TaskErrors
-p system_command_topic:=SystemCommands
-p det_topic:=activity_topic
- -p model_file:=${MODEL_DIR}/task_monitor/r18_test_activity_preds.mscoco.json
+ -p model_file:=${MODEL_DIR}/task_monitor/global_step_predictor_act_avgs_R18.npy
-p thresh_frame_count:=3
-p deactivate_thresh_frame_count:=10
-p threshold_multiplier_weak:=0.00