diff --git a/angel-docker-build.sh b/angel-docker-build.sh index 43018d243..8cc62d380 100755 --- a/angel-docker-build.sh +++ b/angel-docker-build.sh @@ -4,6 +4,7 @@ # set -e SCRIPT_DIR="$(cd "$(dirname "${0}")" && pwd)" +export DOCKER_BUILDKIT=1 # source common functionalities . "${SCRIPT_DIR}/scripts/common.bash" diff --git a/angel_system/activity_classification/tcn_hpl/predict.py b/angel_system/activity_classification/tcn_hpl/predict.py index 1830d9049..e25763277 100644 --- a/angel_system/activity_classification/tcn_hpl/predict.py +++ b/angel_system/activity_classification/tcn_hpl/predict.py @@ -14,7 +14,7 @@ import numpy.typing as npt import torch -from tcn_hpl.data.components.augmentations import NormalizePixelPts +from tcn_hpl.data.components.augmentations import NormalizePixelPts, NormalizeFromCenter from tcn_hpl.models.ptg_module import PTGLitModule from angel_system.activity_classification.utils import ( @@ -23,12 +23,15 @@ ) -def load_module(checkpoint_file, label_mapping_file, torch_device) -> PTGLitModule: +def load_module( + checkpoint_file, label_mapping_file, torch_device, topic +) -> PTGLitModule: """ :param checkpoint_file: :param label_mapping_file: :param torch_device: + :param topic: :return: """ # # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility @@ -45,6 +48,7 @@ def load_module(checkpoint_file, label_mapping_file, torch_device) -> PTGLitModu # HParam overrides data_dir=mapping_file_dir, mapping_file_name=mapping_file_name, + topic=topic, ) # print(f"CLASSES IN MODEL: {model.classes}") @@ -91,9 +95,12 @@ class PatientPose: def normalize_detection_features( det_feats: npt.ArrayLike, feat_version: int, + top_k_objects: int, img_width: int, img_height: int, num_det_classes: int, + normalize_pixel_pts: bool, + normalize_center_pts: bool, ) -> None: """ Normalize input object detection descriptor vectors, outputting new vectors @@ -108,9 +115,16 @@ def normalize_detection_features( :return: Normalized object detection features. """ - # This method is known to normalize in-place. - # Shape [window_size, n_feats] - NormalizePixelPts(img_width, img_height, num_det_classes, feat_version)(det_feats) + if normalize_pixel_pts: + # This method is known to normalize in-place. + # Shape [window_size, n_feats] + NormalizePixelPts( + img_width, img_height, num_det_classes, feat_version, top_k_objects + )(det_feats) + if normalize_center_pts: + NormalizeFromCenter( + img_width, img_height, num_det_classes, feat_version, top_k_objects + )(det_feats) def objects_to_feats( @@ -121,7 +135,9 @@ def objects_to_feats( image_width: int, image_height: int, feature_memo: Optional[Dict[int, npt.NDArray]] = None, - top_n_objects: int = 3, + top_k_objects: int = 1, + normalize_pixel_pts=False, + normalize_center_pts=False, ) -> Tuple[torch.Tensor, torch.Tensor]: """ Convert some object detections for some window of frames into a feature @@ -160,128 +176,64 @@ def objects_to_feats( feature_dtype = None # hands-joints offset vectors - zero_offset = [0 for i in range(22)] - joint_left_hand_offset_all_frames = [None] * window_size - joint_right_hand_offset_all_frames = [None] * window_size - joint_object_offset_all_frames = [None] * window_size + zero_joint_offset = [0 for i in range(22)] + # for pose in frame_patient_poses: - for i, (pose, detection) in enumerate( + for i, (pose, detections) in enumerate( zip(frame_patient_poses, frame_object_detections) ): - if detection is None: + pose_keypoints = [] + print(pose) + if detections is None: continue - labels = detection.labels - bx, by, bw, bh = tlbr_to_xywh( - detection.top, - detection.left, - detection.bottom, - detection.right, - ) - # iterate over all detections in that frame - joint_object_offset = [] - for j, label in enumerate(labels): - if label == "hand (right)" or label == "hand (left)": - x, y, w, h = bx[j], by[j], bw[j], bh[j] - - cx, cy = x + (w // 2), y + (h // 2) - hand_point = np.array((cx, cy)) - - offset_vector = [] - if pose is not None: - for joint in pose: - jx, jy = joint.positions.x, joint.positions.y - joint_point = np.array((jx, jy)) - dist = np.linalg.norm(joint_point - hand_point) - offset_vector.append(dist) - else: - offset_vector = zero_offset - - if label == "hand (left)": - joint_left_hand_offset_all_frames[i] = offset_vector - elif label == "hand (right)": - joint_right_hand_offset_all_frames[i] = offset_vector - else: - # if objects_joints and num_objects > 0: - x, y, w, h = bx[j], by[j], bw[j], bh[j] - cx, cy = x + (w // 2), y + (h // 2) - object_point = np.array((cx, cy)) - offset_vector = [] - if pose is not None: - for joint in pose: - jx, jy = joint.positions.x, joint.positions.y - joint_point = np.array((jx, jy)) - dist = np.linalg.norm(joint_point - object_point) - offset_vector.append(dist) - else: - offset_vector = zero_offset - joint_object_offset.append(offset_vector) - - joint_object_offset_all_frames[i] = joint_object_offset - - for i, frame_dets in enumerate(frame_object_detections): - frame_dets: ObjectDetectionsLTRB - if frame_dets is not None: - f_id = frame_dets.id - if f_id not in feat_memo: - # the input message has tlbr, but obj_det2d_set_to_feature - # requires xywh. - xs, ys, ws, hs = tlbr_to_xywh( - frame_dets.top, - frame_dets.left, - frame_dets.bottom, - frame_dets.right, - ) - feat = obj_det2d_set_to_feature( - frame_dets.labels, + detection_id = detections.id + confidences = detections.confidences + if detection_id in feat_memo.keys(): + # We've already processed this set + feat = feat_memo[detection_id] + else: + labels = detections.labels + xs, ys, ws, hs = tlbr_to_xywh( + detections.top, + detections.left, + detections.bottom, + detections.right, + ) + + if pose is not None: + for joint in pose: + kwcoco_format_joint = { + "xy": [joint.positions.x, joint.positions.y], + "keypoint_category_id": -1, # TODO: not in message + "keypoint_category": joint.labels, + } + pose_keypoints.append(kwcoco_format_joint) + + feat = ( + obj_det2d_set_to_feature( + labels, xs, ys, ws, hs, - frame_dets.confidences, - None, - None, - None, - None, - None, - label_to_ind=det_label_to_idx, + confidences, + pose_keypoints=( + pose_keypoints if pose_keypoints else zero_joint_offset + ), + obj_label_to_ind=det_label_to_idx, version=feat_version, - top_n_objects=top_n_objects, + top_k_objects=top_k_objects, ) + .ravel() + .astype(np.float32) + ) - offset_vector = [] - - if joint_left_hand_offset_all_frames[i] is not None: - offset_vector.extend(joint_left_hand_offset_all_frames[i]) - else: - offset_vector.extend(zero_offset) - - if joint_right_hand_offset_all_frames[i] is not None: - offset_vector.extend(joint_right_hand_offset_all_frames[i]) - else: - offset_vector.extend(zero_offset) - - for j in range(top_n_objects): - if joint_object_offset_all_frames[i] is not None: - if len(joint_object_offset_all_frames[i]) > j: - offset_vector.extend(joint_object_offset_all_frames[i][j]) - else: - offset_vector.extend(zero_offset) - else: - offset_vector.extend(zero_offset) - - feat.extend(offset_vector) - feat = np.array(feat, dtype=np.float64).ravel() - feat_memo[f_id] = feat - - print(f"feat: {feat}") - print(f"feat shape: {feat.shape}") + feat_memo[detection_id] = feat - else: - feat = feat_memo[f_id] - feature_ndim = feat.shape - feature_dtype = feat.dtype - feature_list[i] = feat + feature_ndim = feat.shape + feature_dtype = feat.dtype + feature_list[i] = feat # Already checked that we should have non-zero frames with detections above # so feature_ndim/_dtype should not be None at this stage assert feature_ndim is not None @@ -306,9 +258,17 @@ def objects_to_feats( # Normalize features # Shape [window_size, n_feats] - normalize_detection_features( - feature_vec, feat_version, image_width, image_height, len(det_label_to_idx) - ) + if normalize_pixel_pts or normalize_center_pts: + normalize_detection_features( + feature_vec, + feat_version, + top_k_objects, + image_width, + image_height, + len(det_label_to_idx), + normalize_pixel_pts, + normalize_center_pts, + ) return feature_vec, mask diff --git a/angel_system/activity_classification/train_activity_classifier.py b/angel_system/activity_classification/train_activity_classifier.py index 288dd94c7..e60de931f 100644 --- a/angel_system/activity_classification/train_activity_classifier.py +++ b/angel_system/activity_classification/train_activity_classifier.py @@ -34,8 +34,8 @@ def data_loader( - inv_act_map: Activity id to label string dict - image_activity_gt: Image id to activity label string dict - image_id_to_dataset: Image id to id in ``dset`` dict - - label_to_ind: Object detection labels to ids dict - - act_id_to_str: Object detection ids to labels dict + - obj_label_to_ind: Object detection labels to ids dict + - obj_ind_to_label: Object detection ids to labels dict - ann_by_image: Image id to annotation dict """ print("Loading data....") @@ -80,14 +80,11 @@ def data_loader( min_cat = min([dset.cats[i]["id"] for i in dset.cats]) num_act = len(dset.cats) - label_to_ind = { + obj_label_to_ind = { dset.cats[i]["name"]: dset.cats[i]["id"] - min_cat for i in dset.cats } - print( - f"Object label mapping:\n\t" - f"{json.dumps([o['name'] for o in dset.categories().objs])}" - ) - act_id_to_str = {dset.cats[i]["id"]: dset.cats[i]["name"] for i in dset.cats} + print(f"Object label mapping:\n\t", obj_label_to_ind) + obj_ind_to_label = {dset.cats[i]["id"]: dset.cats[i]["name"] for i in dset.cats} ann_by_image = {} for gid, anns in dset.index.gid_to_aids.items(): @@ -101,8 +98,8 @@ def data_loader( inv_act_map, image_activity_gt, image_id_to_dataset, - label_to_ind, - act_id_to_str, + obj_label_to_ind, + obj_ind_to_label, ann_by_image, ) @@ -111,24 +108,23 @@ def compute_feats( act_map: dict, image_activity_gt: dict, image_id_to_dataset: dict, - label_to_ind: dict, - act_id_to_str: dict, + obj_label_to_ind: dict, + obj_ind_to_label: dict, ann_by_image: dict, feat_version=1, - objects_joints: bool = False, - hands_joints: bool = False, - aug_trans_range=None, - aug_rot_range=None, - top_n_objects=3, + top_k_objects=1, ) -> Tuple[np.ndarray, np.ndarray]: """Compute features from object detections :param act_map: Activity label string to id :param image_activity_gt: Image id to activity label string dict :param image_id_to_dataset: Image id to id in ``dset`` dict - :param label_to_ind: Object detection labels to ids dict - :param act_id_to_str: Object detection ids to labels dict + :param obj_label_to_ind: Object detection labels to ids dict + :param obj_ind_to_label: Object detection ids to labels dict :param ann_by_image: Image id to annotation dict + :param feat_version: + Version of the feature conversion approach. + :param top_k_objects: Number top confidence objects to use per label, defaults to 1 :return: resulting feature data and its labels """ @@ -137,18 +133,7 @@ def compute_feats( Y = [] dataset_id = [] last_dset = 0 - - hands_possible_labels = ["hand (right)", "hand (left)", "hand", "hands"] - non_objects_labels = ["patient", "user"] - hands_inds = [ - key for key, label in act_id_to_str.items() if label in hands_possible_labels - ] - non_object_inds = [ - key for key, label in act_id_to_str.items() if label in non_objects_labels - ] - object_inds = list( - set(list(label_to_ind.values())) - set(hands_inds) - set(non_object_inds) - ) + zero_joint_offset = [0 for i in range(22)] for image_id in sorted(list(ann_by_image.keys())): label_vec = [] @@ -157,173 +142,39 @@ def compute_feats( ws = [] hs = [] label_confidences = [] - obj_obj_contact_state = [] - obj_obj_contact_conf = [] - obj_hand_contact_state = [] - obj_hand_contact_conf = [] + pose_keypoints = [] - if objects_joints or hands_joints: - joint_left_hand_offset = [] - joint_right_hand_offset = [] - joint_object_offset = [] + # Reorganize detections into lists + if len(ann_by_image[image_id]) == 0: + continue + pose_keypoints = zero_joint_offset + for ann in ann_by_image[image_id]: + cat = obj_ind_to_label[ann["category_id"]] - num_hands, num_objects = 0, 0 + # Ignore the patient and user bboxes, use the pose from the patient + if cat in ["patient", "user"]: + if cat == "patient": + pose_keypoints = ann["keypoints"] + continue - for ann in ann_by_image[image_id]: - if "keypoints" in ann.keys(): - pose_keypoints = ann["keypoints"] - - elif "confidence" in ann.keys(): - label_vec.append(act_id_to_str[ann["category_id"]]) - x, y = ann["bbox"][0], ann["bbox"][1] - w, h = ann["bbox"][2], ann["bbox"][3] - - if aug_trans_range != None and aug_rot_range != None: - - print(f"performing augmentation") - random_translation_x = np.random.uniform( - aug_trans_range[0], aug_trans_range[1] - ) - random_translation_y = np.random.uniform( - aug_trans_range[0], aug_trans_range[1] - ) - random_rotation = np.random.uniform( - aug_rot_range[0], aug_rot_range[1] - ) - - object_center_x, object_center_y = x + w // 2, y + h // 2 - - rotation_matrix = np.array( - [ - [ - np.cos(random_rotation), - -np.sin(random_rotation), - random_translation_x, - ], - [ - np.sin(random_rotation), - np.cos(random_rotation), - random_translation_y, - ], - [0, 0, 1], - ] - ) - - xy = np.array([x, y, 1]) - xy_center = np.array([object_center_x, object_center_y, 1]) - - rot_xy = (xy - xy_center) @ rotation_matrix.T + xy_center - - x = rot_xy[0] - y = rot_xy[1] - - xs.append(x) - ys.append(y) - ws.append(w) - hs.append(h) - label_confidences.append(ann["confidence"]) - - if ann["category_id"] in hands_inds: - num_hands += 1 - elif ann["category_id"] in object_inds: - num_objects += 1 - try: - obj_obj_contact_state.append(ann["obj-obj_contact_state"]) - obj_obj_contact_conf.append(ann["obj-obj_contact_conf"]) - obj_hand_contact_state.append(ann["obj-hand_contact_state"]) - obj_hand_contact_conf.append(ann["obj-hand_contact_conf"]) - except KeyError: - pass - - # hardcoded width? - image_center = 1280 // 2 - if num_hands > 0: - hands_loc_dict = {} - for i, label in enumerate(label_vec): - if label == "hand": - hand_center = xs[i] + ws[i] // 2 - if hand_center < image_center: - if "hand (left)" not in hands_loc_dict.keys(): - label_vec[i] = "hand (left)" - hands_loc_dict[label_vec[i]] = (hand_center, i) - else: - if hand_center > hands_loc_dict["hand (left)"][0]: - label_vec[i] = "hand (right)" - hands_loc_dict[label_vec[i]] = (hand_center, i) - else: - prev_index = hands_loc_dict["hand (left)"][1] - label_vec[prev_index] = "hand (right)" - label_vec[i] = "hand (left)" - else: - if "hand (right)" not in hands_loc_dict.keys(): - label_vec[i] = "hand (right)" - hands_loc_dict[label_vec[i]] = (hand_center, i) - else: - if hand_center < hands_loc_dict["hand (right)"][0]: - label_vec[i] = "hand (left)" - hands_loc_dict[label_vec[i]] = (hand_center, i) - else: - prev_index = hands_loc_dict["hand (right)"][1] - label_vec[prev_index] = "hand (left)" - label_vec[i] = "hand (right)" - - if "hand" in label_to_ind.keys(): - label_to_ind_tmp = {} - for key, value in label_to_ind.items(): - if key == "hand": - label_to_ind_tmp["hand (left)"] = value - label_to_ind_tmp["hand (right)"] = value + 1 - elif key in non_objects_labels: - continue - else: - label_to_ind_tmp[key] = value + 1 - - label_to_ind = label_to_ind_tmp - - zero_offset = [0 for i in range(22)] - if (num_hands > 0 or num_objects > 0) and (hands_joints or objects_joints): - joint_object_offset = [] - for i, label in enumerate(label_vec): - - if hands_joints and num_hands > 0: - - if label == "hand (right)" or label == "hand (left)": - bx, by, bw, bh = xs[i], ys[i], ws[i], hs[i] - hcx, hcy = bx + (bw // 2), by + (bh // 2) - hand_point = np.array((hcx, hcy)) - - offset_vector = [] - if "pose_keypoints" in locals(): - for joint in pose_keypoints: - jx, jy = joint["xy"] - joint_point = np.array((jx, jy)) - dist = np.linalg.norm(joint_point - hand_point) - offset_vector.append(dist) - else: - offset_vector = zero_offset - - if label == "hand (left)": - joint_left_hand_offset = offset_vector - elif label == "hand (right)": - joint_right_hand_offset = offset_vector - - else: - if objects_joints and num_objects > 0: - bx, by, bw, bh = xs[i], ys[i], ws[i], hs[i] - ocx, ocy = bx + (bw // 2), by + (bh // 2) - object_point = np.array((ocx, ocy)) - offset_vector = [] - if "pose_keypoints" in locals(): - for joint in pose_keypoints: - jx, jy = joint["xy"] - joint_point = np.array((jx, jy)) - dist = np.linalg.norm(joint_point - object_point) - offset_vector.append(dist) - else: - offset_vector = zero_offset - - joint_object_offset.append(offset_vector) + label_vec.append(cat) + + x, y, w, h = ann["bbox"] + xs.append(x) + ys.append(y) + ws.append(w) + hs.append(h) + + label_confidences.append(ann["confidence"]) + # Ignore the patient and user labels in the feature vector + only_obj_label_to_ind = { + k: i + for i, (k, v) in enumerate(obj_label_to_ind.items()) + if k not in ["patient", "user"] + } + + # Compute feature vector feature_vec = obj_det2d_set_to_feature( label_vec, xs, @@ -331,42 +182,12 @@ def compute_feats( ws, hs, label_confidences, - None, - obj_obj_contact_state, - obj_obj_contact_conf, - obj_hand_contact_state, - obj_hand_contact_conf, - label_to_ind, + pose_keypoints, + only_obj_label_to_ind, version=feat_version, - top_n_objects=top_n_objects, + top_k_objects=top_k_objects, ) - if objects_joints or hands_joints: - zero_offset = [0 for i in range(22)] - offset_vector = [] - if hands_joints: - - if len(joint_left_hand_offset) >= 1: - offset_vector.extend(joint_left_hand_offset) - else: - offset_vector.extend(zero_offset) - - if len(joint_right_hand_offset) >= 1: - offset_vector.extend(joint_right_hand_offset) - else: - offset_vector.extend(zero_offset) - if objects_joints: - - for i in range(top_n_objects): - if len(joint_object_offset) > i: - offset_vector.extend(joint_object_offset[i]) - else: - offset_vector.extend(zero_offset) - - feature_vec.extend(offset_vector) - - feature_vec = np.array(feature_vec, dtype=np.float64) - X.append(feature_vec.ravel()) try: @@ -477,19 +298,19 @@ def validate( def save( output_dir: Union[str, PosixPath], act_str_list: List[str], - label_to_ind: dict, + obj_label_to_ind: dict, clf: RandomForestClassifier, ): """Save the model to a pickle file :param output_dir: Path to save the model to :param act_str_list: List of activity label strings - :param label_to_ind: Object detection labels to ids dict + :param obj_label_to_ind: Object detection labels to ids dict :param clf: model """ output_fn = f"{output_dir}/activity_weights.pkl" with open(output_fn, "wb") as of: - pickle.dump([label_to_ind, 1, clf, act_str_list], of) + pickle.dump([obj_label_to_ind, 1, clf, act_str_list], of) print(f"Saved weights to {output_fn}") @@ -509,16 +330,16 @@ def train_activity_classifier(args: argparse.Namespace): inv_act_map, image_activity_gt, image_id_to_dataset, - label_to_ind, - act_id_to_str, + obj_label_to_ind, + obj_ind_to_label, ann_by_image, ) = data_loader(args.train_fn, act_labels) X, y = compute_feats( act_map, image_activity_gt, image_id_to_dataset, - label_to_ind, - act_id_to_str, + obj_label_to_ind, + obj_ind_to_label, ann_by_image, ) plot_dataset_counts(X, y, args.output_dir, "train") @@ -529,16 +350,16 @@ def train_activity_classifier(args: argparse.Namespace): val_inv_act_map, val_image_activity_gt, val_image_id_to_dataset, - val_label_to_ind, - val_act_id_to_str, + val_obj_label_to_ind, + val_obj_ind_to_label, val_ann_by_image, ) = data_loader(args.val_fn, act_labels) X_final_test, y_final_test = compute_feats( val_act_map, val_image_activity_gt, val_image_id_to_dataset, - val_label_to_ind, - val_act_id_to_str, + val_obj_label_to_ind, + val_obj_ind_to_label, val_ann_by_image, ) plot_dataset_counts(X_final_test, y_final_test, args.output_dir, "val") @@ -549,7 +370,7 @@ def train_activity_classifier(args: argparse.Namespace): # Save act_str_list = [inv_act_map[key] for key in sorted(list(set(y)))] - save(args.output_dir, act_str_list, label_to_ind, clf) + save(args.output_dir, act_str_list, obj_label_to_ind, clf) def main(): diff --git a/angel_system/activity_classification/utils.py b/angel_system/activity_classification/utils.py index d2ba9419d..eb845ba48 100644 --- a/angel_system/activity_classification/utils.py +++ b/angel_system/activity_classification/utils.py @@ -1,13 +1,31 @@ -from typing import Dict -from typing import Tuple +import os + +from typing import Dict, Tuple, List import kwimage +import random import numpy as np import numpy.typing as npt import matplotlib.pyplot as plt -import matplotlib.patches as patches +import matplotlib.colors as mcolors + from PIL import Image +from pathlib import Path + + +######################### +# Default values +######################### +default_dist = (0, 0) # (1280 * 2, 720 * 2) +default_center_dist = (0, 0) # (1280, 720) +default_bbox = [0, 0, 0, 0] # [0, 0, 1280, 720] +default_center = ([[0]], [[0]]) # kwimage.Boxes([default_bbox], "xywh").center +default_center_list = [default_center[0][0][0], default_center[1][0][0]] +zero_joint_offset = [0 for i in range(22)] + +random_colors = list(mcolors.CSS4_COLORS.keys()) +random.shuffle(random_colors) def tlbr_to_xywh( @@ -26,6 +44,7 @@ def tlbr_to_xywh( :param right: Array-like of right box coordinate values. :return: + List of x values, List of y values, List of width values, List of height values """ assert ( len(top) == len(left) == len(bottom) == len(right) @@ -37,246 +56,739 @@ def tlbr_to_xywh( return xs, ys, ws, hs -def obj_det2d_set_to_feature( - label_vec, - xs, - ys, - ws, - hs, - label_confidences, - descriptors, - obj_obj_contact_state, - obj_obj_contact_conf, - obj_hand_contact_state, - obj_hand_contact_conf, - label_to_ind: Dict[str, int], - version: int = 1, - top_n_objects=3, -): - """Convert ObjectDetection2dSet fields into a feature vector. +def feature_version_to_options(feature_version: int) -> Dict[str, bool]: + """Convert the feature version number to a dict of + boolean flags indicating which data values should be added to the feature vector - :param label_to_ind: - Dictionary mapping a label str and returns the index within the feature vector. + :param feature_version: Version of the feature conversion approach. - :param version: - Version of the feature conversion approach. + :return: + Dictionary of flag names and boolean values that match the input parameters + to the functions that create/utilize the feature vector """ - if version == 1: - """ - Feature vector that encodes the activation feature of each class - - Len: 42 - - [A[obj1] ... A[objN]] - """ - feature_vec = obj_det2d_set_to_feature_by_method( - label_vec, - xs, - ys, - ws, - hs, - label_confidences, - label_to_ind, - use_activation=True, - ) + options = {} - elif version == 2: - """ - Feature vector that encodes the distance of each object from each hand, - and the activation features + """ + Feature vector that encodes the activation feature of each class - Len: 204 + Len: top_k_objects * num_obj_classes - [ + [ + for k_obj in top_k_object: + A[obj1] ... A[objN] + ] + """ + options[1] = {"use_activation": True} + + """ + Feature vector that encodes the distance of each object from each hand, + and the activation features + + Len: + top_k_objects * ( + 1 + (num_obj_classes-2)*2 + 1 + (num_obj_classes-2)*2 + 2 + (num_obj_classes-2) + ) + + [ + for k_obj in top_k_object: A[right hand], - D[right hand, obj1]x, D[right hand, obj1]y, ... , D[right hand, objN]y, + D[right hand, obj1_k]x, D[right hand, obj1_k]y, ... , D[right hand, objN_k]y, A[left hand], - D[left hand, obj1]x, D[left hand, obj1]y, ... , D[left hand, objN]y, + D[left hand, obj1_k]x, D[left hand, obj1_k]y, ... , D[left hand, objN_k]y, D[right hand, left hand]x, D[right hand, left hand]y, - A[obj1] ... A[objN] - ] - """ - feature_vec = obj_det2d_set_to_feature_by_method( - label_vec, - xs, - ys, - ws, - hs, - label_confidences, - label_to_ind, - use_activation=True, - use_hand_dist=True, - ) + A[obj1_k] ... A[objN_k] + ] + """ + options[2] = { + "use_activation": True, + "use_hand_dist": True, + } - elif version == 3: - """ - Feature vector that encodes the distance of each object to the center of the frame, - the intersection of each object to the hands, - and the activation features + """ + Feature vector that encodes the distance of each object to the center of the frame, + the intersection of each object to the hands, + and the activation features - Len: 207 + Len: + top_k_objects * ( + 1 + 2 + 1 + 2 + 1 + (1 + 1 + 1 + 2) * (num_obj_classes-2) + ) - [ + [ + for k_obj in top_k_object: A[right hand], D[right hand, center]x, D[right hand, center]y, A[left hand], D[left hand, center]x, D[left hand, center]y, - I[right hand, left hand] - A[obj1], - I[right hand, obj1], - I[left hand, obj1] - D[obj1, center]x, D[obj1, center]y - ] - """ - feature_vec = obj_det2d_set_to_feature_by_method( - label_vec, - xs, - ys, - ws, - hs, - label_confidences, - label_to_ind, - use_activation=True, - use_center_dist=True, - use_intersection=True, - ) + I[right hand, left hand], + A[obj1_k] I[right hand, obj1_k] I[left hand, obj1_k], D[obj1_k, center]x, D[obj1_k, center]y ... , D[objN_k, center]y + ] + """ + options[3] = { + "use_activation": True, + "use_center_dist": True, + "use_intersection": True, + } - elif version == 5: - """ - Feature vector that encodes the distance of each object from each hand, - the intersection of each object to the hands, - and the activation features + """ + Feature vector that encodes the distance of each object from each hand, + the intersection of each object to the hands, + and the activation features - Len: 1 + ((N-2)*2) + 1 + ((N-2)*2) + 2 + 1 + (3 * (N-2)), where N is the number of object classes + Len: + top_k_objects * ( + 1 + 2 * (num_obj_classes-2) + 1 + 2 * (num_obj_classes-2) + 2 + 1 + (1 + 1 + 1) * (num_obj_classes-2) + ) + + [ + for k_obj in top_k_object: + A[right hand], + D[right hand, obj1_k]x, D[right hand, obj1_k]y, ... , D[right hand, objN_k]y, + A[left hand], + D[left hand, obj1_k]x, D[left hand, obj1_k]y, ... , D[left hand, objN_k]y, + D[right hand, left hand]x, D[right hand, left hand]y, + I[right hand, left hand], + A[obj1_k] I[right hand, obj1_k] I[left hand, obj1_k], ... , I[left hand, objN_k] + ] + """ + options[5] = { + "use_activation": True, + "use_hand_dist": True, + "use_intersection": True, + } - [ + """ + Feature vector that encodes the distance of each object from each hand, + the intersection of each object to the hands, + the distance from the center of the hands to each patient joint, + and the distance from the center of each object to each patient joint, + and the activation features + + Len: + top_k_objects * ( + (1 + (num_obj_classes-2)*2) * 2 + 2 + 1 + + (num_obj_classes-2) * (1+1+1) + ) + + 22*2 + 22*2 + + top_k_objects * ((22*2)*(num_obj_classes-2)) + + + [ + for k_obj in top_k_object: A[right hand], - D[right hand, obj1]x, D[right hand, obj1]y, ... , D[right hand, objN]y, + D[right hand, obj1_k]x, D[right hand, obj1_k]y, ... , D[right hand, objN_k]y, A[left hand], - D[left hand, obj1]x, D[left hand, obj1]y, ... , D[left hand, objN]y, + D[left hand, obj1_k]x, D[left hand, obj1_k]y, ... , D[left hand, objN_k]y, D[right hand, left hand]x, D[right hand, left hand]y, - I[right hand, left hand] - A[obj1] I[right hand, obj1] I[left hand, obj1], ... , I[left hand, objN] - ] - """ - feature_vec = obj_det2d_set_to_feature_by_method( - label_vec, - xs, - ys, - ws, - hs, - label_confidences, - label_to_ind, - use_activation=True, - use_hand_dist=True, - use_intersection=True, - ) - elif version == 6: - feature_vec = obj_det2d_set_to_feature_by_method( - label_vec, - xs, - ys, - ws, - hs, - label_confidences, - label_to_ind, - use_activation=True, - use_hand_dist=True, - use_intersection=True, - use_joint_hand_offset=True, - use_joint_object_offset=True, - top_n_objects=top_n_objects, - ) - else: - raise NotImplementedError(f"Unhandled version '{version}'") + I[right hand, left hand], + A[obj1_k] I[right hand, obj1_k] I[left hand, obj1_k], ... , I[left hand, objN_k], + D[left hand, joint1]x, ... , D[left hand, joint 22]y, + D[right hand, joint1]x, ... , D[right hand, joint 22]y, + for k_obj in top_k_object: + D[obj1_k, joint1]x, ... , D[obj1_k, joint22]y, + ..., + D[objN_k, joint1]x, ... , D[objN_k, joint22]y + ] + """ + options[6] = { + "use_activation": True, + "use_hand_dist": True, + "use_intersection": True, + "use_joint_hand_offset": True, + "use_joint_object_offset": True, + } + + return options[feature_version] + + +def obj_det2d_set_to_feature( + label_vec: List[str], + xs: List[float], + ys: List[float], + ws: List[float], + hs: List[float], + label_confidences: List[float], + pose_keypoints: List[Dict], + obj_label_to_ind: Dict[str, int], + version: int = 1, + top_k_objects: int = 1, +): + """Convert ObjectDetection2dSet fields into a feature vector. + + :param label_vec: List of object labels for each detection (length: # detections) + :param xs: List of x values for each detection (length: # detections) + :param ys: List of y values for each detection (length: # detections) + :param ws: List of width values for each detection (length: # detections) + :param hs: List of height values for each detection (length: # detections) + :param label_confidences: List of confidence values for each detection (length: # detections) + :param pose_keypoints: + List of joints, represented by a dictionary contining the x and y corrdinates of the points and the category id and string + :param obj_label_to_ind: + Dictionary mapping a label str and returns the index within the feature vector. + :param version: + Version of the feature conversion approach. + :param top_k_objects: Number top confidence objects to use per label, defaults to 1 + + :return: resulting feature data + """ + opts = feature_version_to_options(version) + feature_vec = obj_det2d_set_to_feature_by_method( + label_vec, + xs, + ys, + ws, + hs, + label_confidences, + pose_keypoints, + obj_label_to_ind, + top_k_objects=top_k_objects, + **opts, + ) # print(f"feat {feature_vec}") # print(len(feature_vec)) return feature_vec -def obj_det2d_set_to_feature_by_method( - label_vec, - xs, - ys, - ws, - hs, - label_confidences, - label_to_ind: Dict[str, int], - use_activation=False, - use_hand_dist=False, - use_center_dist=False, - use_intersection=False, - use_joint_hand_offset=False, - use_joint_object_offset=False, - top_n_objects=3, +def plot_feature_vec( + image_fn: str, + right_hand_center: list, + left_hand_center: list, + feature_vec: np.array, + obj_label_to_ind: Dict[str, int], + output_dir: str, + top_k_objects: int = 1, + use_activation: bool = False, + use_hand_dist: bool = False, + use_center_dist: bool = False, + use_intersection: bool = False, + use_joint_hand_offset: bool = False, + use_joint_object_offset: bool = False, + joint_names: List[str] = [ + "nose", + "mouth", + "throat", + "chest", + "stomach", + "left_upper_arm", + "right_upper_arm", + "left_lower_arm", + "right_lower_arm", + "left_wrist", + "right_wrist", + "left_hand", + "right_hand", + "left_upper_leg", + "right_upper_leg", + "left_knee", + "right_knee", + "left_lower_leg", + "right_lower_leg", + "left_foot", + "right_foot", + "back", + ], + colors: List[str] = [ + "yellow", + "red", + "green", + "lightblue", + "blue", + "purple", + "orange", + ], ): + """Plot the object and joint points based on the hand bbox centers and the distance values + in the feature vector + + :param image_fn: Path to the image to draw on + :param right_hand_center: List of the x and y coordinates of the right hand box center + :param left_hand_center: List of the x and y coordinates of the left hand box center + :param feature_vec: Numpy array of values determined by the provided flags + :param obj_label_to_ind: + Dictionary mapping a label str and returns the index within the feature vector. + :param output_dir: Path to a folder to save the generated images to + :param top_k_objects: Number top confidence objects to use per label, defaults to 1 + :param use_activation: If True, add the confidence values of the detections to the feature vector, defaults to False + :param use_hand_dist: If True, add the distance of the detection centers to both hand centers to the feature vector, defaults to False + :param use_intersection: If True, add the intersection of the detection boxes with the hand boxes to the feature vector, defaults to False + :param use_joint_hand_offset: If True, add the distance of the hand centers to the patient joints to the feature vector, defaults to False + :param use_joint_object_offset: If True, add the distance of the object centers to the patient joints to the feature vector, defaults to False + :param joint_names: List of the joint names + :param colors: List of colors to use when plotting points """ - `label_vec`, `xs`, `ys`, `ws`, hs` are to all be parallel in association - and describe the object detections to create an embedding from. - - :param label_vec: Object label of the most confident class for each - detection. - :param xs: Upper-left X coordinate for each detection. - :param ys: Upper-left Y coordinate for each detection. - :param ws: Pixel width for each detection. - :param hs: Pixel height for each detection. - :param label_confidences: Confidence value of the most confident class for - each detection. - :param label_to_ind: Mapping of detection class indices - :param use_activation: - :param use_hand_dist: - :param use_center_dist: - :param use_intersection: - - :return: Feature vector embedding of the input detections. + Path(output_dir).mkdir(parents=True, exist_ok=True) + + rh_joint_dists = [] + lh_joint_dists = [] + rh_dists_k = [[] for i in range(top_k_objects)] + lh_dists_k = [[] for i in range(top_k_objects)] + obj_confs_k = [[] for i in range(top_k_objects)] + obj_im_center_dists_k = [[] for i in range(top_k_objects)] + obj_joint_dists_k = [[] for i in range(top_k_objects)] + + non_object_labels = ["hand (left)", "hand (right)", "user", "patient"] + labels = sorted(obj_label_to_ind) + for non_obj_label in non_object_labels: + labels.remove(non_obj_label) + + ind = -1 + for object_k_index in range(top_k_objects): + # RIGHT HAND + if use_activation: + ind += 1 + right_hand_conf = feature_vec[ind] + + if use_hand_dist: + for obj_label in labels: + ind += 1 + obj_rh_dist_x = feature_vec[ind] + ind += 1 + obj_rh_dist_y = feature_vec[ind] + + rh_dists_k[object_k_index].append([obj_rh_dist_x, obj_rh_dist_y]) + + if use_center_dist: + ind += 1 + rh_im_center_dist_x = feature_vec[ind] + ind += 1 + rh_im_center_dist_y = feature_vec[ind] + + # LEFT HAND + if use_activation: + ind += 1 + left_hand_conf = feature_vec[ind] + + if use_hand_dist: + # Left hand distances + for obj_label in labels: + ind += 1 + obj_lh_dist_x = feature_vec[ind] + ind += 1 + obj_lh_dist_y = feature_vec[ind] + + lh_dists_k[object_k_index].append([obj_lh_dist_x, obj_lh_dist_y]) + + if use_center_dist: + ind += 1 + lh_im_center_dist_x = feature_vec[ind] + ind += 1 + lh_im_center_dist_y = feature_vec[ind] + + # Right - left hand + if use_hand_dist: + # Right - left hand distance + ind += 1 + rh_lh_dist_x = feature_vec[ind] + ind += 1 + rh_lh_dist_y = feature_vec[ind] + if use_intersection: + ind += 1 + lh_rh_intersect = feature_vec[ind] + + # OBJECTS + for obj_label in labels: + if use_activation: + # Object confidence + ind += 1 + obj_conf = feature_vec[ind] + + obj_confs_k[object_k_index].append(obj_conf) + + if use_intersection: + # obj - right hand intersection + ind += 1 + obj_rh_intersect = feature_vec[ind] + # obj - left hand intersection + ind += 1 + obj_lh_intersect = feature_vec[ind] + + if use_center_dist: + # image center - obj distances + ind += 1 + obj_im_center_dist_x = feature_vec[ind] + ind += 1 + obj_im_center_dist_y = feature_vec[ind] + + obj_im_center_dists_k[object_k_index].append( + [obj_im_center_dist_x, obj_im_center_dist_y] + ) + + # HANDS-JOINTS + if use_joint_hand_offset: + # left hand - joints distances + for i in range(22): + ind += 1 + lh_jointi_dist_x = feature_vec[ind] + ind += 1 + lh_jointi_dist_y = feature_vec[ind] + + lh_joint_dists.append([lh_jointi_dist_x, lh_jointi_dist_y]) + + # right hand - joints distances + for i in range(22): + ind += 1 + rh_jointi_dist_x = feature_vec[ind] + ind += 1 + rh_jointi_dist_y = feature_vec[ind] + + rh_joint_dists.append([rh_jointi_dist_x, rh_jointi_dist_y]) + + # OBJS-JOINTS + if use_joint_object_offset: + for object_k_index in range(top_k_objects): + # obj - joints distances + for obj_label in labels: + joints_dists = [] + for i in range(22): + ind += 1 + obj_jointi_dist_x = feature_vec[ind] + ind += 1 + obj_jointi_dist_y = feature_vec[ind] + + joints_dists.append([obj_jointi_dist_x, obj_jointi_dist_y]) + + obj_joint_dists_k[object_k_index].append(joints_dists) + + # Draw + fig, ( + (lh_dist_ax, rh_dist_ax), + (im_center_dist_ax, obj_joint_dist_ax), + (lh_joint_dist_ax, rh_joint_dist_ax), + ) = plt.subplots(3, 2, figsize=(15, 15)) + axes = [ + rh_dist_ax, + lh_dist_ax, + im_center_dist_ax, + obj_joint_dist_ax, + rh_joint_dist_ax, + lh_joint_dist_ax, + ] + flags = [ + use_hand_dist, + use_hand_dist, + use_center_dist, + use_joint_object_offset, + use_joint_hand_offset, + use_joint_hand_offset, + ] + + rh_dist_ax.set_title("Objects from distance to right hand") + lh_dist_ax.set_title("Objects from distance to left hand") + im_center_dist_ax.set_title("Objects from distance to image center") + obj_joint_dist_ax.set_title("Joints from distance to objects*") + rh_joint_dist_ax.set_title("Joints from distance to right hand") + lh_joint_dist_ax.set_title("Joints from distance to left hand") + + rh_dist_color = colors[2] + lh_dist_color = colors[3] + obj_im_center_dist_color = colors[4] + lh_joint_color = colors[5] + rh_joint_color = colors[6] + + image = Image.open(image_fn) + image = np.array(image) + + # Default values for each plot + for ax, flag in zip(axes, flags): + if not flag: + continue + + ax.imshow(image) + + ax.plot(right_hand_center[0], right_hand_center[1], color=colors[0], marker="o") + ax.annotate( + f"hand (right): {round(right_hand_conf, 2)}", + right_hand_center, + color="black", + annotation_clip=False, + ) + + ax.plot(left_hand_center[0], left_hand_center[1], color=colors[1], marker="o") + ax.annotate( + f"hand (left): {round(left_hand_conf, 2)}", + left_hand_center, + color="black", + annotation_clip=False, + ) + + def draw_points_by_distance(ax, distances, pt, color, labels, confs): + # Make sure the reference point exists + if pt == default_center_list: + return + + for i, dist in enumerate(distances): + # Make sure the object point exists + if dist == list(default_dist): + continue + + obj_pt = [pt[0] - dist[0], pt[1] - dist[1]] # pt - obj_pt = dist + + ax.plot([pt[0], obj_pt[0]], [pt[1], obj_pt[1]], color=color, marker="o") + ax.annotate( + f"{labels[i]}: {round(confs[i], 2)}", + obj_pt, + color="black", + annotation_clip=False, + ) + + if use_joint_hand_offset: + draw_points_by_distance( + rh_joint_dist_ax, + rh_joint_dists, + right_hand_center, + rh_joint_color, + joint_names, + [1] * len(joint_names), + ) + draw_points_by_distance( + lh_joint_dist_ax, + lh_joint_dists, + left_hand_center, + lh_joint_color, + joint_names, + [1] * len(joint_names), + ) + + if use_hand_dist: + rh_dist_ax.plot( + [right_hand_center[0], right_hand_center[0] - rh_lh_dist_x], + [right_hand_center[1], right_hand_center[1] - rh_lh_dist_y], + color=random_colors[0], + marker="o", + ) + + for object_k_index in range(top_k_objects): + if use_hand_dist: + draw_points_by_distance( + rh_dist_ax, + rh_dists_k[object_k_index], + right_hand_center, + rh_dist_color, + labels, + obj_confs_k[object_k_index], + ) + draw_points_by_distance( + lh_dist_ax, + lh_dists_k[object_k_index], + left_hand_center, + lh_dist_color, + labels, + obj_confs_k[object_k_index], + ) + + if use_center_dist: + image_center = [1280 // 2, 720 // 2] + im_center_dist_ax.plot(image_center, color=colors[1], marker="o") + im_center_dist_ax.annotate( + "image_center", image_center, color="black", annotation_clip=False + ) + draw_points_by_distance( + im_center_dist_ax, + obj_im_center_dists_k[object_k_index], + image_center, + obj_im_center_dist_color, + labels, + obj_confs_k[object_k_index], + ) + + if use_joint_object_offset: + + obj_pts = [] + if use_hand_dist: + if right_hand_center != default_center_list: + obj_pts = [ + ( + [ + right_hand_center[0] - rh_dist[0], + right_hand_center[1] - rh_dist[1], + ] + if rh_dist != list(default_dist) + else default_center_list + ) + for rh_dist in rh_dists_k[object_k_index] + ] + elif left_hand_center != default_center_list: + obj_pts = [ + ( + [ + left_hand_center[0] - lh_dist[0], + left_hand_center[1] - lh_dist[1], + ] + if lh_dist != list(default_dist) + else default_center_list + ) + for lh_dist in lh_dists_k[object_k_index] + ] + elif use_center_dist: + obj_pts = [ + ( + [ + image_center[0] - im_center_dist[0], + image_center[1] - im_center_dist[1], + ] + if im_center_dist != list(default_dist) + else default_center_list + ) + for im_center_dist in obj_im_center_dists_k[object_k_index] + ] + + if not obj_pts: + continue + + for i, obj_pt in enumerate(obj_pts): + if obj_pt == default_center_list: + continue + + obj_joint_color = random_colors[(object_k_index * len(obj_pt)) + i] + obj_joint_dist_ax.plot( + obj_pt[0], obj_pt[1], color=obj_joint_color, marker="o" + ) + obj_joint_dist_ax.annotate( + f"{labels[i]}: {round(obj_confs_k[object_k_index][i], 2)}", + obj_pt, + color="black", + annotation_clip=False, + ) + draw_points_by_distance( + obj_joint_dist_ax, + obj_joint_dists_k[object_k_index][i], + obj_pt, + obj_joint_color, + joint_names, + [1] * len(joint_names), + ) + + Path(f"{output_dir}/full_feature_vec").mkdir(parents=True, exist_ok=True) + plt.savefig(f"{output_dir}/full_feature_vec/{os.path.basename(image_fn)}") + + def copy_ax_to_new_fig(ax, subfolder): + ax.remove() + + fig2 = plt.figure(figsize=(15, 15)) + ax.figure = fig2 + fig2.axes.append(ax) + fig2.add_axes(ax) + + dummy = fig2.add_subplot(111) + ax.set_position(dummy.get_position()) + dummy.remove() + + Path(f"{output_dir}/{subfolder}").mkdir(parents=True, exist_ok=True) + plt.savefig(f"{output_dir}/{subfolder}/{os.path.basename(image_fn)}") + + plt.close(fig2) + + # Save each subplot as its own image + for ax, subfolder, flag in zip( + [ + lh_dist_ax, + rh_dist_ax, + im_center_dist_ax, + obj_joint_dist_ax, + lh_joint_dist_ax, + rh_joint_dist_ax, + ], + [ + "left_hand_obj_dist", + "right_hand_obj_dist", + "image_center_obj_dist", + "obj_joints_dist", + "left_hand_joints_dist", + "right_hand_joints_dist", + ], + flags, + ): + if not flag: + continue + copy_ax_to_new_fig(ax, subfolder) + + plt.close(fig) + + +def obj_det2d_set_to_feature_by_method( + label_vec: List[str], + xs: List[float], + ys: List[float], + ws: List[float], + hs: List[float], + label_confidences: List[float], + pose_keypoints: List[Dict], + obj_label_to_ind: Dict[str, int], + top_k_objects: int = 1, + use_activation: bool = False, + use_hand_dist: bool = False, + use_center_dist: bool = False, + use_intersection: bool = False, + use_joint_hand_offset: bool = False, + use_joint_object_offset: bool = False, +): """ - ######################### - # Default values - ######################### - default_dist = (0, 0) # (1280 * 2, 720 * 2) - default_center_dist = (0, 0) # (1280, 720) - default_bbox = [0, 0, 0, 0] # [0, 0, 1280, 720] - default_center = ([[0]], [[0]]) # kwimage.Boxes([default_bbox], "xywh").center - width, height = 1280, 720 - image_center = width // 2 + :param label_vec: List of object labels for each detection (length: # detections) + :param xs: List of x values for each detection (length: # detections) + :param ys: List of y values for each detection (length: # detections) + :param ws: List of width values for each detection (length: # detections) + :param hs: List of height values for each detection (length: # detections) + :param label_confidences: List of confidence values for each detection (length: # detections) + :param pose_keypoints: + List of joints, represented by a dictionary contining the x and y corrdinates of the points and the category id and string + :param obj_label_to_ind: + Dictionary mapping a label str and returns the index within the feature vector. + :param top_k_objects: Number top confidence objects to use per label, defaults to 1 + :param use_activation: If True, add the confidence values of the detections to the feature vector, defaults to False + :param use_hand_dist: If True, add the distance of the detection centers to both hand centers to the feature vector, defaults to False + :param use_intersection: If True, add the intersection of the detection boxes with the hand boxes to the feature vector, defaults to False + :param use_joint_hand_offset: If True, add the distance of the hand centers to the patient joints to the feature vector, defaults to False + :param use_joint_object_offset: If True, add the distance of the object centers to the patient joints to the feature vector, defaults to False + :return: + resulting feature data + """ ######################### # Data ######################### # Number of object detection classes - hand_labels = ["hand (right)", "hand (left)", "hand", "hands"] - non_objects_labels = ["patient", "user"] - remove_classes_count = [ - 1 for label in non_objects_labels if label in label_to_ind.keys() - ] - num_det_classes = len(label_to_ind) - len( - remove_classes_count - ) # accomedate 2 hands instead of 1, accomedate top 3 objects + num_det_classes = len(obj_label_to_ind) - det_class_max_conf = np.zeros((num_det_classes, top_n_objects)) + # Maximum confidence observe per-class across input object detections. + # If a class has not been observed, it is set to 0 confidence. + det_class_max_conf = np.zeros((num_det_classes, top_k_objects)) # The bounding box of the maximally confident detection - det_class_bbox = np.zeros((top_n_objects, num_det_classes, 4), dtype=np.float64) + det_class_bbox = np.zeros((top_k_objects, num_det_classes, 4), dtype=np.float64) det_class_bbox[:] = default_bbox # Binary mask indicate which detection classes are present on this frame. - det_class_mask = np.zeros(num_det_classes, dtype=np.bool_) + det_class_mask = np.zeros((top_k_objects, num_det_classes), dtype=np.bool_) - # print(f"label vec: {label_vec}") + # Record the most confident detection for each object class as recorded in + # `obj_label_to_ind` (confidence & bbox) for i, label in enumerate(label_vec): - if label in label_to_ind: - conf = label_confidences[i] - ind = label_to_ind[label] - det_class_mask[ind] = True - conf_list = det_class_max_conf[ind, :] - if conf > det_class_max_conf[ind].min(): - first_zero = np.where(conf_list == conf_list.min()) # [0][0] - first_zero = first_zero[0][0] - conf_list[first_zero] = conf - obj_index = np.where(conf_list == conf) - - det_class_max_conf[ind] = conf_list - det_class_bbox[obj_index, ind] = [xs[i], ys[i], ws[i], hs[i]] # xywh + assert label in obj_label_to_ind, f"Label {label} is unknown" + + conf = label_confidences[i] + ind = obj_label_to_ind[label] + + conf_list = det_class_max_conf[ind, :] + if conf > det_class_max_conf[ind].min(): + # Replace the lowest confidence object with our new higher confidence object + min_conf_ind = np.where(conf_list == conf_list.min())[0][0] + + conf_list[min_conf_ind] = conf + det_class_bbox[min_conf_ind, ind] = [xs[i], ys[i], ws[i], hs[i]] + det_class_mask[min_conf_ind, ind] = True + + # Sort the confidences to determine the top_k order + sorted_index = np.argsort(conf_list)[::-1] + sorted_conf_list = np.array([conf_list[k] for k in sorted_index]) + + # Reorder the values to match the confidence top_k order + det_class_max_conf[ind] = sorted_conf_list + + bboxes = det_class_bbox.copy() + mask = det_class_mask.copy() + for idx, sorted_ind in enumerate(sorted_index): + det_class_bbox[idx, ind] = bboxes[sorted_ind, ind] + det_class_mask[idx, ind] = mask[sorted_ind, ind] det_class_kwboxes = kwimage.Boxes(det_class_bbox, "xywh") @@ -284,7 +796,7 @@ def obj_det2d_set_to_feature_by_method( # util functions ######################### def find_hand(hand_str): - hand_idx = label_to_ind[hand_str] + hand_idx = obj_label_to_ind[hand_str] hand_conf = det_class_max_conf[hand_idx][0] hand_bbox = kwimage.Boxes([det_class_bbox[0, hand_idx]], "xywh") @@ -292,8 +804,8 @@ def find_hand(hand_str): def dist_to_center(center1, center2): center_dist = [ - center2[0][0][0] - center1[0][0][0], - center2[1][0][0] - center1[1][0][0], + center1[0][0][0] - center2[0][0][0], + center1[1][0][0] - center2[1][0][0], ] return center_dist @@ -310,83 +822,114 @@ def dist_to_center(center1, center2): "hand (left)" ) - RIGHT_IDX = 0 - LEFT_IDX = 1 right_left_hand_kwboxes = det_class_kwboxes[0, [right_hand_idx, left_hand_idx]] + # Mask detailing hand presence in the scene. - hand_mask = det_class_mask[[right_hand_idx, left_hand_idx]] - # 2-D mask object class gate per hand - hand_by_object_mask = np.dot(hand_mask[:, None], det_class_mask[None, :]) + RIGHT_IDX = 0 + LEFT_IDX = 1 + hand_mask = [det_class_mask[0][right_hand_idx], det_class_mask[0][left_hand_idx]] + # Mask detailing hand and object presence in the scene. + hand_by_object_mask_k = np.zeros( + (top_k_objects, 2, num_det_classes), dtype=np.bool_ + ) + + for object_k_index in range(top_k_objects): + x = np.array( + [ + [ + hand_mask[RIGHT_IDX] and det_class + for det_class in det_class_mask[object_k_index] + ], + [ + hand_mask[LEFT_IDX] and det_class + for det_class in det_class_mask[object_k_index] + ], + ] + ) + hand_by_object_mask_k[object_k_index] = x ######################### - # Distances + # Hand distances ######################### if use_hand_dist: # Compute distances to the right and left hands. Distance to the hand # is defined by `hand.center - object.center`. # `kwcoco.Boxes.center` returns a tuple of two arrays, each shaped # [n_boxes, 1]. - all_obj_centers_x, all_obj_centers_y = det_class_kwboxes.center # [n_dets, 1] hand_centers_x, hand_centers_y = right_left_hand_kwboxes.center # [2, 1] - # Hand distances from objects. Shape: [2, n_dets] - right_hand_dist_n = np.zeros( - (top_n_objects, hand_by_object_mask.shape[1], hand_by_object_mask.shape[0]) - ) - left_hand_dist_n = np.zeros( - (top_n_objects, hand_by_object_mask.shape[1], hand_by_object_mask.shape[0]) - ) - # print(f"left_hand_dist_n: {left_hand_dist_n.shape}") - for object_index in range(top_n_objects): - obj_centers_x = all_obj_centers_x[object_index] - obj_centers_y = all_obj_centers_y[object_index] # [n_dets, 1] + + # Hand distances from objects. Shape: [top_k, n_dets, 2] + right_hand_dist_k = np.zeros((top_k_objects, num_det_classes, 2)) + left_hand_dist_k = np.zeros((top_k_objects, num_det_classes, 2)) + for object_k_index in range(top_k_objects): + obj_centers_x = all_obj_centers_x[object_k_index] + obj_centers_y = all_obj_centers_y[object_k_index] hand_dist_x = np.subtract( hand_centers_x, obj_centers_x.T, - where=hand_by_object_mask, + where=hand_by_object_mask_k[object_k_index], # required, otherwise indices may be left uninitialized. - out=np.zeros(shape=hand_by_object_mask.shape), + out=np.zeros(shape=(2, num_det_classes)), ) hand_dist_y = np.subtract( hand_centers_y, obj_centers_y.T, - where=hand_by_object_mask, + where=hand_by_object_mask_k[object_k_index], # required, otherwise indices may be left uninitialized. - out=np.zeros(shape=hand_by_object_mask.shape), + out=np.zeros(shape=(2, num_det_classes)), ) + # Collate into arrays of (x, y) coordinates. right_hand_dist = np.stack( [hand_dist_x[RIGHT_IDX], hand_dist_y[RIGHT_IDX]], axis=1 ) + # for dist in right_hand_dist: + # if not hand_by_object_mask_k[object_k_index][RIGHT_IDX] left_hand_dist = np.stack( [hand_dist_x[LEFT_IDX], hand_dist_y[LEFT_IDX]], axis=1 ) - right_hand_dist_n[object_index] = right_hand_dist - left_hand_dist_n[object_index] = left_hand_dist + right_hand_dist_k[object_k_index] = right_hand_dist + left_hand_dist_k[object_k_index] = left_hand_dist else: - right_hand_dist = left_hand_dist = None + right_hand_dist_k = left_hand_dist_k = None + ######################### + # Image center + # distances + ######################### if use_center_dist: - image_center = kwimage.Boxes([default_bbox], "xywh").center + image_center = kwimage.Boxes( + [0, 0, 1280, 720], "xywh" + ).center # Hard coded image size default_center_dist = [image_center[0][0][0] * 2, image_center[1][0][0] * 2] - distances_to_center = [] - for i in range(num_det_classes): - obj_conf = det_class_max_conf[i] + # Object distances from image center. Shape: [top_k, n_dets, 2] + image_center_obj_dist_k = np.zeros((top_k_objects, num_det_classes, 2)) + for object_k_index in range(top_k_objects): + obj_centers_x = all_obj_centers_x[object_k_index] + obj_centers_y = all_obj_centers_y[object_k_index] - obj_bbox = kwimage.Boxes([det_class_bbox[i]], "xywh") - obj_center = obj_bbox.center + for obj_ind in range(num_det_classes): + obj_conf = det_class_max_conf[obj_ind] - center_dist = ( - dist_to_center(image_center, obj_center) - if obj_conf != 0 - else default_center_dist - ) + obj_bbox = kwimage.Boxes( + [det_class_bbox[object_k_index][obj_ind]], "xywh" + ) + obj_center = obj_bbox.center + + center_dist = ( + dist_to_center(image_center, obj_center) + if obj_conf != 0 + else default_center_dist + ) - distances_to_center.append(center_dist) + image_center_obj_dist_k[object_k_index][obj_ind] = center_dist + else: + image_center_obj_dist_k = None ######################### # Intersection @@ -397,17 +940,13 @@ def dist_to_center(center1, center2): # intersected by the representative object bounding-box. # If a hand or object is not present in the scene, then their # respective intersection area is 0. - right_hand_intersection_n = np.zeros( - (top_n_objects, hand_by_object_mask.shape[1]) - ) - left_hand_intersection_n = np.zeros( - (top_n_objects, hand_by_object_mask.shape[1]) - ) - for object_index in range(top_n_objects): + # Shape: [top_k, n_dets] + right_hand_intersection_k = np.zeros((top_k_objects, num_det_classes)) + left_hand_intersection_k = np.zeros((top_k_objects, num_det_classes)) + for object_k_index in range(top_k_objects): + obj_bboxes = det_class_kwboxes[object_k_index] - hand_obj_intersection_vol = right_left_hand_kwboxes.isect_area( - det_class_kwboxes[object_index] - ) + hand_obj_intersection_vol = right_left_hand_kwboxes.isect_area(obj_bboxes) right_left_hand_area = right_left_hand_kwboxes.area # Handling avoiding div-by-zero using the `where` parameter. @@ -419,32 +958,73 @@ def dist_to_center(center1, center2): # indices where `right_left_hand_area == 0`. out=np.zeros_like(hand_obj_intersection_vol), ) - right_hand_intersection = hand_obj_intersection[0] left_hand_intersection = hand_obj_intersection[1] - right_hand_intersection_n[object_index] = right_hand_intersection - left_hand_intersection_n[object_index] = left_hand_intersection - + right_hand_intersection_k[object_k_index] = right_hand_intersection + left_hand_intersection_k[object_k_index] = left_hand_intersection else: - right_hand_intersection = left_hand_intersection = None + right_hand_intersection_k = left_hand_intersection_k = None ######################### - # Feature vector + # Joints ######################### - feature_vec = [] - # Add hand data - for object_index in range(top_n_objects): + def calc_joint_offset(bbox_center_x, bbox_center_y): + offset_vector = [] + if pose_keypoints == zero_joint_offset or ( + bbox_center_x == default_center_list[0] + and bbox_center_y == default_center_list[1] + ): + # If we don't have the joints or the object, return default values + for joint in pose_keypoints: + offset_vector.append(default_dist) + return offset_vector + + for joint in pose_keypoints: + jx, jy = joint["xy"] + joint_point = [jx, jy] + + dist = [bbox_center_x - joint_point[0], bbox_center_y - joint_point[1]] + offset_vector.append(dist) + + return offset_vector + + # HAND - JOINTS + if use_joint_hand_offset: + joint_right_hand_offset = calc_joint_offset( + right_hand_center[0][0][0], right_hand_center[1][0][0] + ) + joint_left_hand_offset = calc_joint_offset( + left_hand_center[0][0][0], left_hand_center[1][0][0] + ) + + # OBJECTS - JOINTS + if use_joint_object_offset: + # Object distances from patient joints. Shape: [top_k, n_dets, 22, 2] + obj_joints_dist_k = np.zeros((top_k_objects, num_det_classes, 22, 2)) + for object_k_index in range(top_k_objects): + obj_centers_x = all_obj_centers_x[object_k_index] + obj_centers_y = all_obj_centers_y[object_k_index] - right_hand_dist = right_hand_dist_n[object_index] - left_hand_dist = left_hand_dist_n[object_index] + joint_object_offset = [] + for obj_ind in range(num_det_classes): + offset_vector = calc_joint_offset( + obj_centers_x[obj_ind], obj_centers_y[obj_ind] + ) + joint_object_offset.append(offset_vector) - right_hand_intersection = right_hand_intersection_n[object_index] - left_hand_intersection = left_hand_intersection_n[object_index] + obj_joints_dist_k[object_k_index] = joint_object_offset + + ######################### + # Feature vector + ######################### + feature_vec = [] - for hand_conf, hand_idx, hand_dist, hand_intersection in [ - (right_hand_conf, right_hand_idx, right_hand_dist, right_hand_intersection), - (left_hand_conf, left_hand_idx, left_hand_dist, left_hand_intersection), + for object_k_index in range(top_k_objects): + # HANDS + for hand_conf, hand_idx, hand_dist in [ + (right_hand_conf, right_hand_idx, right_hand_dist_k[object_k_index]), + (left_hand_conf, left_hand_idx, left_hand_dist_k[object_k_index]), ]: if use_activation: feature_vec.append([hand_conf]) @@ -457,32 +1037,47 @@ def dist_to_center(center1, center2): ] feature_vec.append(hd1) if use_center_dist: - feature_vec.append(distances_to_center[hand_idx]) - - # print(f"top-N objects feature_vec: {len(feature_vec)}") - # Add distance and intersection between hands. - # This is already there since the hands are in dets_class + feature_vec.append(image_center_obj_dist_k[0][hand_idx]) + # RIGHT-LEFT HAND if use_hand_dist: - feature_vec.append(right_hand_dist[left_hand_idx]) - + feature_vec.append(right_hand_dist_k[0][left_hand_idx]) if use_intersection: - feature_vec.append([right_hand_intersection[left_hand_idx]]) + feature_vec.append([right_hand_intersection_k[0][left_hand_idx]]) - # Add object data - for i in range(num_det_classes): - if i in [right_hand_idx, left_hand_idx]: + # OBJECTS + for obj_ind in range(num_det_classes): + if obj_ind in [right_hand_idx, left_hand_idx]: # We already have the hand data continue if use_activation: - feature_vec.append([det_class_max_conf[i][object_index]]) + feature_vec.append([det_class_max_conf[obj_ind][object_k_index]]) if use_intersection: - feature_vec.append([right_hand_intersection[i]]) - feature_vec.append([left_hand_intersection[i]]) + feature_vec.append([right_hand_intersection_k[object_k_index][obj_ind]]) + feature_vec.append([left_hand_intersection_k[object_k_index][obj_ind]]) if use_center_dist: - feature_vec.append(distances_to_center[i]) + feature_vec.append(image_center_obj_dist_k[object_k_index][obj_ind]) + + # HANDS-JOINTS + if use_joint_hand_offset: + for lh_offset in joint_left_hand_offset: + feature_vec.append(lh_offset) + + for rh_offset in joint_right_hand_offset: + feature_vec.append(rh_offset) + + # OBJ-JOINTS + if use_joint_object_offset: + for object_k_index in range(top_k_objects): + for obj_ind in range(num_det_classes): + if obj_ind in [right_hand_idx, left_hand_idx]: + # We already have the hand data + continue + for offset in obj_joints_dist_k[object_k_index][obj_ind]: + feature_vec.append(offset) feature_vec = [item for sublist in feature_vec for item in sublist] # flatten + feature_vec = np.array(feature_vec, dtype=np.float64) return feature_vec diff --git a/angel_system/data/common/cli/str_to_id_csv.py b/angel_system/data/common/cli/str_to_id_csv.py index 349af6155..5fc369e4f 100644 --- a/angel_system/data/common/cli/str_to_id_csv.py +++ b/angel_system/data/common/cli/str_to_id_csv.py @@ -4,12 +4,7 @@ import csv import argparse -from angel_system.data.common.load_data import ( - activities_from_dive_csv, - objs_as_dataframe, - time_from_name, - sanitize_str, -) +from angel_system.data.common.load_data import sanitize_str def str_to_id(activity_config_fn, activity_gt_dir): diff --git a/angel_system/data/common/create_custom_learn_video_dataset.py b/angel_system/data/common/create_custom_learn_video_dataset.py index 90e21c6c9..9a8b9db1d 100644 --- a/angel_system/data/common/create_custom_learn_video_dataset.py +++ b/angel_system/data/common/create_custom_learn_video_dataset.py @@ -65,9 +65,9 @@ def main(args): ): temp_df = df[df["# 1: Detection or Track-id"] == str(label)] if temp_df.iloc[0]["10-11+: Repeated Species"] not in label_dict.keys(): - label_dict[ - temp_df.iloc[0]["10-11+: Repeated Species"] - ] = label_counter + label_dict[temp_df.iloc[0]["10-11+: Repeated Species"]] = ( + label_counter + ) label_counter += 1 min_frame = pd.to_numeric(temp_df["3: Unique Frame Identifier"]).min() max_frame = pd.to_numeric(temp_df["3: Unique Frame Identifier"]).max() diff --git a/angel_system/data/common/kwcoco_utils.py b/angel_system/data/common/kwcoco_utils.py index 47fac3564..8446c26fe 100644 --- a/angel_system/data/common/kwcoco_utils.py +++ b/angel_system/data/common/kwcoco_utils.py @@ -26,8 +26,8 @@ from angel_system.data.common.load_data import ( activities_from_dive_csv, objs_as_dataframe, - time_from_name, sanitize_str, + time_from_name, ) from angel_system.data.common.load_data import Re_order @@ -49,7 +49,7 @@ def load_kwcoco(dset): return dset -def add_activity_gt_to_kwcoco(task, dset): +def add_activity_gt_to_kwcoco(topic, task, dset, activity_config_fn): """Takes an existing kwcoco file and fills in the "activity_gt" field on each image based on the activity annotations. @@ -61,9 +61,18 @@ def add_activity_gt_to_kwcoco(task, dset): # Load kwcoco file dset = load_kwcoco(dset) - data_dir = f"/data/PTG/{task}/" + data_dir = f"/data/PTG/{topic}/" activity_gt_dir = f"{data_dir}/activity_anns" + # Load activity config + with open(activity_config_fn, "r") as stream: + activity_config = yaml.safe_load(stream) + activity_labels = activity_config["labels"] + label_version = activity_config["version"] + + activity_gt_dir = f"{activity_gt_dir}/{task}_labels/" + + # Add ground truth to kwcoco for video_id in dset.index.videos.keys(): video = dset.index.videos[video_id] video_name = video["name"] @@ -71,18 +80,11 @@ def add_activity_gt_to_kwcoco(task, dset): if "_extracted" in video_name: video_name = video_name.split("_extracted")[0] - video_skill = "m2" # video["recipe"] - with open( - f"../config/activity_labels/{task}/task_{video_skill}.yaml", "r" - ) as stream: - recipe_activity_config = yaml.safe_load(stream) - recipe_activity_labels = recipe_activity_config["labels"] - - recipe_activity_gt_dir = f"{activity_gt_dir}/{video_skill}_labels/" - - activity_gt_fn = f"{recipe_activity_gt_dir}/{video_name}_activity_labels_v2.csv" - gt = activities_from_dive_csv(activity_gt_fn) + activity_gt_fn = ( + f"{activity_gt_dir}/{video_name}_activity_labels_v{label_version}.csv" + ) + gt = activities_from_dive_csv(topic, activity_gt_fn) gt = objs_as_dataframe(gt) image_ids = dset.index.vidid_to_gids[video_id] @@ -90,9 +92,14 @@ def add_activity_gt_to_kwcoco(task, dset): # Update the activity gt for each image for gid in sorted(image_ids): im = dset.imgs[gid] - frame_idx, time = time_from_name(im["file_name"]) + frame_idx, time = time_from_name(im["file_name"], topic) - matching_gt = gt.loc[(gt["start"] <= time) & (gt["end"] >= time)] + if time: + matching_gt = gt.loc[(gt["start"] <= time) & (gt["end"] >= time)] + else: + matching_gt = gt.loc[ + (gt["start_frame"] <= frame_idx) & (gt["end_frame"] >= frame_idx) + ] if matching_gt.empty: label = "background" @@ -105,9 +112,7 @@ def add_activity_gt_to_kwcoco(task, dset): try: activity = [ - x - for x in recipe_activity_labels - if int(x["id"]) == int(float(label)) + x for x in activity_labels if int(x["id"]) == int(float(label)) ] except: activity = [] @@ -131,6 +136,7 @@ def add_activity_gt_to_kwcoco(task, dset): # dset.fpath = dset.fpath.split(".")[0] + "_fixed.mscoco.json" dset.dump(dset.fpath, newlines=True) + return dset def visualize_kwcoco_by_label(dset=None, save_dir=""): @@ -163,9 +169,6 @@ def visualize_kwcoco_by_label(dset=None, save_dir=""): fn = im["file_name"].split("/")[-1] gt = im.get("activity_gt", "") - if not gt: - gt = "" - # act_pred = im.get("activity_pred", "") fig, ax = plt.subplots() # title = f"GT: {gt}, PRED: {act_pred}" @@ -179,13 +182,12 @@ def visualize_kwcoco_by_label(dset=None, save_dir=""): aids = gid_to_aids[gid] anns = ub.dict_subset(dset.anns, aids) - using_contact = False + for aid, ann in anns.items(): conf = ann.get("confidence", 1) # if conf < 0.1: # continue - x, y, w, h = ann["bbox"] # xywh cat_id = ann["category_id"] cat = dset.cats[cat_id]["name"] @@ -193,6 +195,8 @@ def visualize_kwcoco_by_label(dset=None, save_dir=""): color = colors[obj_labels.index(cat)] + # bbox + x, y, w, h = ann["bbox"] # xywh rect = patches.Rectangle( (x, y), w, @@ -204,6 +208,44 @@ def visualize_kwcoco_by_label(dset=None, save_dir=""): ) ax.add_patch(rect) + + # keypoints + if "keypoints" in ann.keys(): + kp_connections = { + "nose": ["mouth"], + "mouth": ["throat"], + "throat": ["chest", "left_upper_arm", "right_upper_arm"], + "chest": ["back"], + "left_upper_arm": ["left_lower_arm"], + "left_lower_arm": ["left_wrist"], + "left_wrist": ["left_hand"], + "right_upper_arm": ["right_lower_arm"], + "right_lower_arm": ["right_wrist"], + "right_wrist": ["right_hand"], + "back": ["left_upper_leg", "right_upper_leg"], + "left_upper_leg": ["left_knee"], + "left_knee": ["left_lower_leg"], + "left_lower_leg": ["left_foot"], + "right_upper_leg": ["right_knee"], + "right_knee": ["right_lower_leg"], + "right_lower_leg": ["right_foot"], + } + kps = {} + + for kp in ann["keypoints"]: + kps[kp["keypoint_category"]] = kp["xy"] + + for kp_cat, connects in kp_connections.items(): + for connect in connects: + pt1 = kps[kp_cat] + pt2 = kps[connect] + ax.plot( + [pt1[0], pt2[0]], + [pt1[1], pt2[1]], + color=color, + marker="o", + ) + ax.annotate(label, (x, y), color="black", annotation_clip=False) video_dir = ( @@ -221,12 +263,12 @@ def visualize_kwcoco_by_label(dset=None, save_dir=""): plt.close("all") -def imgs_to_video(imgs_dir): +def imgs_to_video(imgs_dir, topic): """Convert directory of images to a video""" video_name = imgs_dir.split("/")[-1] + ".avi" images = glob.glob(f"{imgs_dir}/*.png") - images = sorted(images, key=lambda x: time_from_name(x)[0]) + images = sorted(images, key=lambda x: time_from_name(x, topic)[0]) frame = cv2.imread(images[0]) height, width, layers = frame.shape diff --git a/angel_system/data/common/load_data.py b/angel_system/data/common/load_data.py index 122d0f31a..d1801a133 100644 --- a/angel_system/data/common/load_data.py +++ b/angel_system/data/common/load_data.py @@ -30,6 +30,15 @@ def sanitize_str(str_: str): return str_.lower().strip(" .") +def time_from_name(fname, topic="cooking"): + if topic == "medical": + from angel_system.data.medical.load_bbn_data import time_from_name as tfn + elif topic == "cooking": + from angel_system.data.cooking.load_kitware_data import time_from_name as tfn + + return tfn(fname) + + def Re_order(image_list, image_number): img_id_list = [] for img in image_list: @@ -44,33 +53,6 @@ def Re_order(image_list, image_number): return new_list -RE_FILENAME_TIME = re.compile( - r"frame_(?P\d+)_(?P\d+(?:_|.)\d+).(?P\w+)" -) - - -def time_from_name(fname): - """ - Extract the float timestamp from the filename. - - :param fname: Filename of an image in the format - frame___. - - :return: timestamp (float) in seconds - """ - fname = os.path.basename(fname) - match = RE_FILENAME_TIME.match(fname) - time = match.group("ts") - if "_" in time: - time = time.split("_") - time = float(time[0]) + (float(time[1]) * 1e-9) - elif "." in time: - time = float(time) - - frame = match.group("frame") - return int(frame), time - - def load_from_file( gt_fn, detections_fn ) -> Tuple[List[str], pd.DataFrame, pd.DataFrame]: @@ -138,7 +120,7 @@ def load_from_file( return labels, gt, detections -def activities_from_dive_csv(filepath: str) -> List[Activity]: +def activities_from_dive_csv(topic, filepath: str) -> List[Activity]: """ Load from a DIVE output CSV file a sequence of ground truth activity annotations. @@ -149,6 +131,11 @@ def activities_from_dive_csv(filepath: str) -> List[Activity]: :param filepath: Filesystem path to the CSV file. :return: List of loaded activity annotations. """ + if topic == "medical": + from angel_system.data.medical.load_bbn_data import time_from_name + elif topic == "cooking": + from angel_system.data.cooking.load_kitware_data import time_from_name + print(f"Loading ground truth activities from: {filepath}") df = pd.read_csv(filepath) diff --git a/angel_system/data/cooking/load_kitware_data.py b/angel_system/data/cooking/load_kitware_data.py index 5c6d5d99b..e57363545 100644 --- a/angel_system/data/cooking/load_kitware_data.py +++ b/angel_system/data/cooking/load_kitware_data.py @@ -1,13 +1,41 @@ +import os import kwcoco import glob import warnings - +import re import ubelt as ub from angel_system.data.common.load_data import Re_order from angel_system.data.common.kwcoco_utils import load_kwcoco +RE_FILENAME_TIME = re.compile( + r"frame_(?P\d+)_(?P\d+(?:_|.)\d+).(?P\w+)" +) + + +def time_from_name(fname): + """ + Extract the float timestamp from the filename. + + :param fname: Filename of an image in the format + frame___. + + :return: timestamp (float) in seconds + """ + fname = os.path.basename(fname) + match = RE_FILENAME_TIME.match(fname) + time = match.group("ts") + if "_" in time: + time = time.split("_") + time = float(time[0]) + (float(time[1]) * 1e-9) + elif "." in time: + time = float(time) + + frame = match.group("frame") + return int(frame), time + + def object_label_fixes(obj_cat): # Fix some deprecated labels if obj_cat in ["timer", "timer (20)", "timer (30)", "timer (else)"]: diff --git a/angel_system/data/medical/load_bbn_data.py b/angel_system/data/medical/load_bbn_data.py index 5c08123bb..9103068b8 100644 --- a/angel_system/data/medical/load_bbn_data.py +++ b/angel_system/data/medical/load_bbn_data.py @@ -4,6 +4,7 @@ This should be run on videos not used during training. """ + import os import re import glob @@ -11,6 +12,7 @@ import kwcoco import kwimage import shutil +import warnings import pandas as pd import numpy as np @@ -21,6 +23,26 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1, 2, 3" +RE_FILENAME_TIME = re.compile(r"(?P\w+)-(?P\d+)_(?P\d+).(?P\w+)") + + +def time_from_name(fname): + """ + Extract the float timestamp from the filename. + + :param fname: Filename of an image in the format + frame___. + + :return: timestamp (float) in seconds + """ + fname = os.path.basename(fname) + match = RE_FILENAME_TIME.match(fname) + + frame = match.group("frame") + time = None + return int(frame), time + + def dive_to_activity_file(videos_dir): """DIVE CSV to BBN TXT frame-level annotation file format""" for dive_csv in glob.glob(f"{videos_dir}/*/*.csv"): @@ -207,42 +229,65 @@ def save_as_kwcoco(classes, data, save_fn="bbn-data.mscoco.json"): dset.dump(dset.fpath, newlines=True) -def activity_label_fixes(activity_label, target): - if activity_label == "put_tourniquet_around": - label = "place-tourniquet" - label_id = 1 - if activity_label == "pulls_tight": - label = "pull-tight" - label_id = 2 - if activity_label == "secures" and target == "velcro_strap": - label = "apply-strap-to-strap-body" - label_id = 3 - if activity_label == "twist" and target == "windlass": - label = "turn-windless" - label_id = 4 - if ( - activity_label == "locks_into_windlass_keeper" - or activity_label == "lock_into_windlass_keeper" - ): - label = "lock-windless" - label_id = 5 - if ( - activity_label == "wraps_remaining_strap_around" - or activity_label == "wrap_remaining_strap_around" - ): - label = "pull-remaining-strap" - label_id = 6 - if activity_label == "secures" and target == "windlass": - label = "secure-strap" - label_id = 7 - if activity_label == "writes_on" and target == "tourniquet_label": - label = "mark-time" - label_id = 8 +def activity_label_fixes(task, activity_label, target): + # print(activity_label, target) + if task == "m2": + if activity_label == "put_tourniquet_around": + label = "place-tourniquet" + label_id = 1 + if activity_label == "pulls_tight": + label = "pull-tight" + label_id = 2 + if activity_label == "secures" and target == "velcro_strap": + label = "apply-strap-to-strap-body" + label_id = 3 + if activity_label == "twist" and target == "windlass": + label = "turn-windless" + label_id = 4 + if ( + activity_label == "locks_into_windlass_keeper" + or activity_label == "lock_into_windlass_keeper" + ): + label = "lock-windless" + label_id = 5 + if ( + activity_label == "wraps_remaining_strap_around" + or activity_label == "wrap_remaining_strap_around" + ): + label = "pull-remaining-strap" + label_id = 6 + if activity_label == "secures" and target == "windlass": + label = "secure-strap" + label_id = 7 + if activity_label == "writes_on" and target == "tourniquet_label": + label = "mark-time" + label_id = 8 + elif task == "r18": + if activity_label == "apply_pressure_to" and target == "casualty_wound": + label = "cover-seal-wound" + label_id = 1 + if ( + activity_label == "grabs" + or activity_label == "opens" + or activity_label == "removes" + or activity_label == "discard" + ) and (target == "hyfin_package" or target == "gauze"): + label = "open-pack" + label_id = 2 + if activity_label == "wipes_gauze_on": + label = "clean-wound-site" + label_id = 3 + if activity_label == "removes" and target == "chest_seal_backing": + label = "peel-seal-backer" + label_id = 4 + if activity_label == "apply" and target == "chest_seal": + label = "place-seal" + label_id = 5 return label, label_id -def bbn_activity_txt_to_csv(root_dir, output_dir): +def bbn_activity_txt_to_csv(task, root_dir, output_dir, label_version): """ Generate DIVE csv format activity annotations from BBN's text annotations @@ -263,21 +308,22 @@ def bbn_activity_txt_to_csv(root_dir, output_dir): # Lab videos action_fns = glob.glob(f"{root_dir}/*/*_skills_frame.txt") if not action_fns: - warnings.warn(f"No text annotations found in {root_dir}") + warnings.warn(f"No text annotations found in {root_dir} subfolders") return - for action_txt_fn in action_fns: + for action_txt_fn in sorted(action_fns): track_id = 0 video_dir = os.path.dirname(action_txt_fn) video_name = os.path.basename(video_dir) if video_name in KNOWN_BAD_VIDEOS: continue - action_f = open(action_txt_fn) - lines = action_f.readlines() + print(action_txt_fn) + with open(action_txt_fn) as action_f: + lines = action_f.readlines() # Create output csv - csv_fn = f"{output_dir}/{video_name}_activity_labels_v2.csv" + csv_fn = f"{output_dir}/{video_name}_activity_labels_v{label_version}.csv" csv_f = open(csv_fn, "w") csv_f.write( "# 1: Detection or Track-id,2: Video or Image Identifier,3: Unique Frame Identifier,4-7: Img-bbox(TL_x,TL_y,BR_x,BR_y),8: Detection or Length Confidence,9: Target Length (0 or -1 if invalid),10-11+: Repeated Species,Confidence Pairs or Attributes\n" @@ -292,10 +338,10 @@ def bbn_activity_txt_to_csv(root_dir, output_dir): end_frame = int(data[1]) start_frame_fn = os.path.basename( - glob.glob(f"{video_dir}/images/frame_{start_frame}_*.png")[0] + glob.glob(f"{video_dir}/images/*_{start_frame}.png")[0] ) end_frame_fn = os.path.basename( - glob.glob(f"{video_dir}/images/frame_{end_frame}_*.png")[0] + glob.glob(f"{video_dir}/images/*_{end_frame}.png")[0] ) # Determine activity @@ -307,7 +353,7 @@ def bbn_activity_txt_to_csv(root_dir, output_dir): # convert activity_str info to our activity labels # this is hacky: fix later label = None - label, label_id = activity_label_fixes(activity_label, target) + label, label_id = activity_label_fixes(task, activity, target) if label is not None: line1 = f"{track_id},{start_frame_fn},{start_frame},1,1,2,2,1,-1,{label_id},1" @@ -318,7 +364,6 @@ def bbn_activity_txt_to_csv(root_dir, output_dir): csv_f.write(f"{line2}\n") track_id += 1 - action_f.close() csv_f.close() diff --git a/angel_system/global_step_prediction/global_step_predictor.py b/angel_system/global_step_prediction/global_step_predictor.py index 5841879bb..24063bf95 100644 --- a/angel_system/global_step_prediction/global_step_predictor.py +++ b/angel_system/global_step_prediction/global_step_predictor.py @@ -235,9 +235,9 @@ def initialize_new_recipe_tracker(self, recipe, config_fn=None): }, ) - tracker_dict[ - "last_granular_step_per_broad_step" - ] = self.get_last_granular_step_per_broad_step(broad_steps) + tracker_dict["last_granular_step_per_broad_step"] = ( + self.get_last_granular_step_per_broad_step(broad_steps) + ) tracker_dict["recipe"] = recipe tracker_dict["current_broad_step"] = 0 @@ -254,9 +254,9 @@ def initialize_new_recipe_tracker(self, recipe, config_fn=None): tracker_dict["broad_step_to_activity_ids"] = [ self.get_unique(step["activity_ids"]) for step in broad_steps ] - tracker_dict[ - "granular_step_to_activity_id" - ] = self.get_activity_per_granular_step(broad_steps) + tracker_dict["granular_step_to_activity_id"] = ( + self.get_activity_per_granular_step(broad_steps) + ) # Labels tracker_dict["broad_step_to_label"] = [step["label"] for step in broad_steps] @@ -300,9 +300,9 @@ def increment_granular_step(self, tracker_ind): if current_granular_step < num_granular_steps: self.trackers[tracker_ind]["current_granular_step"] += 1 - self.trackers[tracker_ind][ - "current_broad_step" - ] = self.granular_to_broad_step(tracker, current_granular_step) + self.trackers[tracker_ind]["current_broad_step"] = ( + self.granular_to_broad_step(tracker, current_granular_step) + ) elif current_granular_step == num_granular_steps and tracker["active"] == True: self.trackers[tracker_ind]["active"] = False else: @@ -335,9 +335,9 @@ def decrement_granular_step(self, tracker_ind): if current_granular_step > 0: self.trackers[tracker_ind]["current_granular_step"] -= 1 - self.trackers[tracker_ind][ - "current_broad_step" - ] = self.granular_to_broad_step(tracker, current_granular_step) + self.trackers[tracker_ind]["current_broad_step"] = ( + self.granular_to_broad_step(tracker, current_granular_step) + ) else: raise Exception( f"Tried to decrement tracker #{tracker_ind}: " diff --git a/angel_system/global_step_prediction/r18_only_experiment.py b/angel_system/global_step_prediction/r18_only_experiment.py index b8e69a34b..52da35ee9 100644 --- a/angel_system/global_step_prediction/r18_only_experiment.py +++ b/angel_system/global_step_prediction/r18_only_experiment.py @@ -7,7 +7,8 @@ from sklearn.metrics import confusion_matrix import scipy.ndimage as ndi -from angel_system.global_step_prediction.global_step_predictor import ( +# from angel_system.global_step_prediction.global_step_predictor import ( +from global_step_predictor import ( GlobalStepPredictor, ) @@ -23,9 +24,10 @@ def run_inference_all_vids( step_predictor = GlobalStepPredictor( recipe_types=["r18"], - activity_config_fpath="/home/local/KHQ/peri.akiva/projects/angel_system/config/activity_labels/r18.yaml", + activity_config_fpath="/home/local/KHQ/cameron.johnson/code/tmp_hannah_code/angel_system/config/activity_labels/medical/r18.yaml", + # activity_config_fpath="/data/PTG/medical/training/activity_classifier/TCN_HPL/logs/r18_pro_data_top_1_objs_feat_v6_NEW_ORDER_win_25/runs/2024-05-08_12-05-20/test_activity_preds.mscoco.json", recipe_config_dict={ - "r18": "/home/local/KHQ/peri.akiva/projects/angel_system/config/tasks/r18.yaml" + "r18": "/home/local/KHQ/cameron.johnson/code/tmp_hannah_code/angel_system/config/tasks/medical/r18.yaml" }, # threshold_multiplier=0.3, # threshold_frame_count=2 @@ -36,7 +38,7 @@ def run_inference_all_vids( else: avg_probs = step_predictor.compute_average_TP_activations(coco_train) np.save( - "/home/local/KHQ/peri.akiva/projects/angel_system/model_files/global_step_predictor_act_avgs_all_classes.npy", + "/home/local/KHQ/cameron.johnson/code/tmp_hannah_code/angel_system/model_files/global_step_predictor_act_avgs_all_classes.npy", avg_probs, ) print(f"average_probs = {avg_probs}") @@ -135,11 +137,11 @@ def get_unique(activity_ids): if __name__ == "__main__": coco_train = kwcoco.CocoDataset( - "/home/local/KHQ/peri.akiva/projects/angel_system/model_files/coco/r18_test_activity_preds.mscoco.json" + "/data/PTG/medical/training/activity_classifier/TCN_HPL/logs/r18_pro_data_top_1_objs_feat_v6_NEW_ORDER_win_25/runs/2024-05-08_12-05-20/test_activity_preds.mscoco.json" ) # Same file for now since I don't have another. coco_test = kwcoco.CocoDataset( - "/home/local/KHQ/peri.akiva/projects/angel_system/model_files/coco/r18_test_activity_preds.mscoco.json" + "/data/PTG/medical/training/activity_classifier/TCN_HPL/logs/r18_pro_data_top_1_objs_feat_v6_NEW_ORDER_win_25/runs/2024-05-08_12-05-20/test_activity_preds.mscoco.json" ) recipe_config = {"r18": "config/tasks/medical/r18.yaml"} diff --git a/ansible/roles/provision-files/vars/main.yml b/ansible/roles/provision-files/vars/main.yml index 246c199ab..c52857443 100644 --- a/ansible/roles/provision-files/vars/main.yml +++ b/ansible/roles/provision-files/vars/main.yml @@ -122,16 +122,16 @@ girder_file_downloads: sha512: 7183385f8eaca85997725a107a76034de2bd4a59c1434b4bdb7c1ac8931cf4b68a53f6e736734643386364b9f0856de795a14965b6a02bc5eb5891252e6a73c9 dest: "{{ stage_dirs.object_detector }}/r18_det.pt" # Activity classifier - - file_id: 6605dfc78b763ca20ae99f8c - sha512: 0091b751e046f7816061ab11e99ffc709dd5506453e711407a7a5925d168f29ce9fd2a3d97eeeb6f08008aa62d76c7a99279024d27ce42cbd9544a590c528960 + - file_id: 663e7294687336214e7cdc07 + sha512: b0166110745c39bb14447ddde34f789fcf7808dc8443a5dc1371de484e995e4fc24b426d1b884321ac51f63c88560ccbbed3a09be5006afa69445ec8d9e04151 dest: "{{ stage_dirs.activity_classifier }}/r18_tcn.ckpt" - file_id: 6606b6e9aa5c8de3874c3f4a sha512: 3c84333390ee6b12327bb7e5debed37149c7f95cc437b16939f77b599d1a0b3b8c4f0995820b95973170b94df695494000a1c45fbf46632267c212e125fe58a3 dest: "{{ stage_dirs.activity_classifier }}/r18_mapping.txt" # Global Step predictor model - - file_id: 660ebb1caa5c8de3874c43c9 - sha512: 20c168a220626aaa07192c234366cbc4e998314430632e18b7ea7eb4b1bff491bcd22f131a67ba7713ade9efb49d43ce37010fdc398890deaff13c6022373667 - dest: "{{ stage_dirs.task_monitor }}/r18_test_activity_preds.mscoco.json" + - file_id: 663e4c5d687336214e7cdbff + sha512: 12b5bf950f64d9609182f795f7243edb6438b3919d920f0f527127660bb632abd5917ed149c28aa5ebcef7e7f6d8ed6edff39b27286eb0ce37a1ff7de2da4c77 + dest: "{{ stage_dirs.task_monitor }}/global_step_predictor_act_avgs_R18.npy" # List of git repositories to check out at a specific ref and then archive. # Destination files will be written as GZipped TAR files, so please suffix diff --git a/config/object_labels/medical/r18.yaml b/config/object_labels/medical/r18.yaml index 1016122e4..d0a561e79 100644 --- a/config/object_labels/medical/r18.yaml +++ b/config/object_labels/medical/r18.yaml @@ -1,4 +1,4 @@ -version: "1.0" +version: "1" title: "R18" labels: # Item: diff --git a/python-tpl/TCN_HPL b/python-tpl/TCN_HPL index abb162229..33a8c9236 160000 --- a/python-tpl/TCN_HPL +++ b/python-tpl/TCN_HPL @@ -1 +1 @@ -Subproject commit abb16222925e7c1c6adba97dce458ae3bd0fdd62 +Subproject commit 33a8c92365d147a5ccb8878661eeadefd7600d14 diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index 185866cbd..dc374b375 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -3,6 +3,7 @@ Use get_hydra_config to get cfg dict, use eval.py content as how-to-call example using trainer.predict(model=model, dataloaders=dataloaders, ckpt_path=cfg.ckpt_path) """ + import json from heapq import heappush, heappop from pathlib import Path @@ -53,6 +54,10 @@ PARAM_MODEL_WEIGHTS = "model_weights" # Filesystem path to the class mapping file. PARAM_MODEL_MAPPING = "model_mapping" +# Bool flag to indicate if the NormalizePixelPts augmentation should be applied +PARAM_MODEL_NORMALIZE_PIXEL_PTS = "model_normalize_pixel_pts" +# Bool flag to indicate if the NormalizeFromCenter augmentation should be applied +PARAM_MODEL_NORMALIZE_CENTER_PTS = "model_normalize_center_pts" # Filesystem path to the input object detection label mapping. # This is expected to be a JSON file containing a list of strings. PARAM_MODEL_OD_MAPPING = "model_det_label_mapping" @@ -88,6 +93,8 @@ PARAM_POSE_TOPIC = "pose_topic" +PARAM_TOPIC = "topic" + class NoActivityClassification(Exception): """ @@ -115,10 +122,12 @@ def __init__(self): (PARAM_ACT_TOPIC,), (PARAM_MODEL_WEIGHTS,), (PARAM_MODEL_MAPPING,), + (PARAM_MODEL_NORMALIZE_PIXEL_PTS, False), + (PARAM_MODEL_NORMALIZE_CENTER_PTS, False), (PARAM_MODEL_OD_MAPPING,), (PARAM_MODEL_DEVICE, "cuda"), (PARAM_MODEL_DETS_CONV_VERSION, 6), - (PARAM_WINDOW_FRAME_SIZE, 45), + (PARAM_WINDOW_FRAME_SIZE, 25), (PARAM_BUFFER_MAX_SIZE_SECONDS, 15), (PARAM_IMAGE_PIX_WIDTH, 1280), (PARAM_IMAGE_PIX_HEIGHT, 720), @@ -126,6 +135,7 @@ def __init__(self): (PARAM_OUTPUT_COCO_FILEPATH, ""), (PARAM_INPUT_COCO_FILEPATH, ""), (PARAM_TIME_TRACE_LOGGING, True), + (PARAM_TOPIC, "medical"), ], ) self._img_ts_topic = param_values[PARAM_IMG_TS_TOPIC] @@ -138,6 +148,10 @@ def __init__(self): self._img_pix_height = param_values[PARAM_IMAGE_PIX_HEIGHT] self._enable_trace_logging = param_values[PARAM_TIME_TRACE_LOGGING] + self.model_normalize_pixel_pts = param_values[PARAM_MODEL_NORMALIZE_PIXEL_PTS] + self.model_normalize_center_pts = param_values[PARAM_MODEL_NORMALIZE_CENTER_PTS] + + self.topic = param_values[PARAM_TOPIC] # Load in TCN classification model and weights with SimpleTimer("Loading inference module", log.info): self._model_device = torch.device(param_values[PARAM_MODEL_DEVICE]) @@ -145,6 +159,7 @@ def __init__(self): param_values[PARAM_MODEL_WEIGHTS], param_values[PARAM_MODEL_MAPPING], self._model_device, + topic=self.topic, ).eval() # from pytorch_lightning.utilities.model_summary import summarize # from torchsummary import summary @@ -155,7 +170,9 @@ def __init__(self): print(f"json path: {param_values[PARAM_MODEL_OD_MAPPING]}") with open(param_values[PARAM_MODEL_OD_MAPPING]) as infile: det_label_list = json.load(infile) - self._det_label_to_id = {c: i for i, c in enumerate(det_label_list)} + self._det_label_to_id = { + c: i for i, c in enumerate(det_label_list) if c not in ["patient", "user"] + } print(self._det_label_to_id) # Feature version aligned with model current architecture self._feat_version = param_values[PARAM_MODEL_DETS_CONV_VERSION] @@ -388,9 +405,9 @@ def _thread_populate_from_coco(self, input_coco_path: Path) -> None: # Creates [n_det, n_label] matrix, which we assign to and then # ravel into the message slot. conf_mat = np.zeros((n_dets, len(obj_labels)), dtype=np.float64) - conf_mat[ - np.arange(n_dets), image_annots.get("category_id") - ] = image_annots.get("confidence") + conf_mat[np.arange(n_dets), image_annots.get("category_id")] = ( + image_annots.get("confidence") + ) det_msg.label_confidences.extend(conf_mat.ravel()) # Calling the image callback last since image frames define the @@ -724,6 +741,8 @@ def _process_window(self, window: InputWindow) -> ActivityDetection: image_width=self._img_pix_width, image_height=self._img_pix_height, feature_memo=memo_object_to_feats, + normalize_pixel_pts=self.model_normalize_pixel_pts, + normalize_center_pts=self.model_normalize_center_pts, ) # except ValueError: # # feature detections were all None diff --git a/ros/angel_system_nodes/angel_system_nodes/eval/mitll_eval_2_logger.py b/ros/angel_system_nodes/angel_system_nodes/eval/mitll_eval_2_logger.py index 28da1663d..a77c28a71 100644 --- a/ros/angel_system_nodes/angel_system_nodes/eval/mitll_eval_2_logger.py +++ b/ros/angel_system_nodes/angel_system_nodes/eval/mitll_eval_2_logger.py @@ -6,6 +6,7 @@ This node is currently only compatible with the `global_step_predictor` task monitoring node due to leveraging specific implementation/output semantics. """ + import csv import math from pathlib import Path diff --git a/ros/angel_system_nodes/angel_system_nodes/object_detection/object_and_hand_detection.py b/ros/angel_system_nodes/angel_system_nodes/object_detection/object_and_hand_detection.py index b8b9c4851..0207ebf71 100644 --- a/ros/angel_system_nodes/angel_system_nodes/object_detection/object_and_hand_detection.py +++ b/ros/angel_system_nodes/angel_system_nodes/object_detection/object_and_hand_detection.py @@ -89,7 +89,7 @@ def __init__(self): self._inference_img_size, ) log.info( - f"Loaded model with classes:\n" + f"Loaded object model with classes:\n" + "\n".join(f'\t- "{n}"' for n in self.object_model.names) ) @@ -112,7 +112,12 @@ def __init__(self): callback_group=MutuallyExclusiveCallbackGroup(), ) + # Hand model self.hand_model = YOLOv8(self._hand_model_chpt_fp) + log.info( + f"Loaded hand model with classes:\n" + + "\n".join(f'\t- "{n}"' for n in self.hand_model.names) + ) if not self._no_trace: self.object_model = TracedModel( diff --git a/ros/angel_system_nodes/angel_system_nodes/task_monitoring/global_step_predictor.py b/ros/angel_system_nodes/angel_system_nodes/task_monitoring/global_step_predictor.py index 4b184db33..5ab3e50d7 100644 --- a/ros/angel_system_nodes/angel_system_nodes/task_monitoring/global_step_predictor.py +++ b/ros/angel_system_nodes/angel_system_nodes/task_monitoring/global_step_predictor.py @@ -131,11 +131,9 @@ def __init__(self): activity_config_fpath=self._activity_config_file, ) - # model_file = coco file with confidence predictions - coco = kwcoco.CocoDataset(self._model_file) - avg_probs = self.gsp.compute_average_TP_activations(coco) - # self.gsp.get_average_TP_activations_from_file(self._model_file) - # log.info("Global state predictor loaded") + # model_file = pre-computed averages of TP activations + self.gsp.get_average_TP_activations_from_file(self._model_file) + log.info("Global state predictor loaded") # Mapping from recipe to current step. Used to track state changes # of the GSP and determine when to publish a TaskUpdate msg. diff --git a/ros/angel_utils/multi_task_demo_ui/main.html b/ros/angel_utils/multi_task_demo_ui/main.html index 5018f6fb4..1e1aaa081 100644 --- a/ros/angel_utils/multi_task_demo_ui/main.html +++ b/ros/angel_utils/multi_task_demo_ui/main.html @@ -15,7 +15,7 @@ diff --git a/ros/angel_utils/python/angel_utils/conversion.py b/ros/angel_utils/python/angel_utils/conversion.py index a67d7f96d..8cf662c5c 100644 --- a/ros/angel_utils/python/angel_utils/conversion.py +++ b/ros/angel_utils/python/angel_utils/conversion.py @@ -1,6 +1,7 @@ """ Various conversion functions into and out of angel_msg types. """ + import array import itertools import math diff --git a/ros/angel_utils/scripts/bag_extractor.py b/ros/angel_utils/scripts/bag_extractor.py index 8e10ac97a..25947496e 100644 --- a/ros/angel_utils/scripts/bag_extractor.py +++ b/ros/angel_utils/scripts/bag_extractor.py @@ -153,19 +153,19 @@ def __init__(self): if self.extract_head_pose_data or self.extract_depth_head_pose_data: self.msg_type_to_handler_map[HeadsetPoseData] = self.handle_head_pose_msg if self.extract_hand_pose_data: - self.msg_type_to_handler_map[ - HandJointPosesUpdate - ] = self.handle_hand_pose_msg + self.msg_type_to_handler_map[HandJointPosesUpdate] = ( + self.handle_hand_pose_msg + ) if self.extract_spatial_map_data: self.msg_type_to_handler_map[SpatialMesh] = self.handle_spatial_mesh_msg if self.extract_annotation_event_data: - self.msg_type_to_handler_map[ - AnnotationEvent - ] = self.handle_annotation_event_msg + self.msg_type_to_handler_map[AnnotationEvent] = ( + self.handle_annotation_event_msg + ) if self.extract_activity_detection_data: - self.msg_type_to_handler_map[ - ActivityDetection - ] = self.handle_activity_detection_msg + self.msg_type_to_handler_map[ActivityDetection] = ( + self.handle_activity_detection_msg + ) if self.extract_task_update_data: self.msg_type_to_handler_map[TaskUpdate] = self.handle_task_update_msg diff --git a/tests/angel_system/impls/detect_activities/detections_to_activities/test_utils.py b/tests/angel_system/impls/detect_activities/detections_to_activities/test_utils.py index 481b57119..1341c085b 100644 --- a/tests/angel_system/impls/detect_activities/detections_to_activities/test_utils.py +++ b/tests/angel_system/impls/detect_activities/detections_to_activities/test_utils.py @@ -3,7 +3,7 @@ from tcn_hpl.data.components.augmentations import NormalizePixelPts from angel_system.activity_classification.utils import ( - obj_det2d_set_to_feature_by_method, + obj_det2d_set_to_feature, ) @@ -165,14 +165,14 @@ def test_hand_distance_and_norm(self): "water jug (open)": 5, "water jug lid": 4, }, - # v5 variation - use_activation=True, - use_hand_dist=True, - use_intersection=True, + version=5, + top_n_objects=1, ) - feature_vec = obj_det2d_set_to_feature_by_method(**test_input) + feature_vec = obj_det2d_set_to_feature(**test_input) + print("feature_vec: ", feature_vec) feature_vec_normalized = feature_vec.copy() + print("feature_vec_normalized: ", feature_vec_normalized) norm(feature_vec_normalized[None, ...]) # Where normalization happened, nothing should be out of the [0, 1] diff --git a/tmux/demos/medical/Kitware-R18.yml b/tmux/demos/medical/Kitware-R18.yml index 3209fdd80..98d178283 100644 --- a/tmux/demos/medical/Kitware-R18.yml +++ b/tmux/demos/medical/Kitware-R18.yml @@ -98,7 +98,7 @@ windows: -p task_error_topic:=TaskErrors -p system_command_topic:=SystemCommands -p det_topic:=activity_topic - -p model_file:=${MODEL_DIR}/task_monitor/r18_test_activity_preds.mscoco.json + -p model_file:=${MODEL_DIR}/task_monitor/global_step_predictor_act_avgs_R18.npy -p thresh_frame_count:=3 -p deactivate_thresh_frame_count:=10 -p threshold_multiplier_weak:=0.00