diff --git a/yoeo/detect.py b/yoeo/detect.py index 7f99ac6..18ecaf5 100755 --- a/yoeo/detect.py +++ b/yoeo/detect.py @@ -17,7 +17,9 @@ from imgaug.augmentables.segmaps import SegmentationMapsOnImage from yoeo.models import load_model -from yoeo.utils.utils import load_classes, rescale_boxes, non_max_suppression, print_environment_info, rescale_segmentation +from yoeo.utils.class_config import ClassConfig +from yoeo.utils.dataclasses import ClassNames, SqueezeConfig +from yoeo.utils.utils import rescale_boxes, non_max_suppression, print_environment_info, rescale_segmentation from yoeo.utils.datasets import ImageFolder from yoeo.utils.transforms import Resize, DEFAULT_TRANSFORMS @@ -26,7 +28,7 @@ from matplotlib.ticker import NullLocator -def detect_directory(model_path, weights_path, img_path, classes, output_path, +def detect_directory(model_path, weights_path, img_path, class_config: ClassConfig, output_path, batch_size=8, img_size=416, n_cpu=8, conf_thres=0.5, nms_thres=0.5, robot_class_ids: Optional[List[int]] = None): """Detects objects on all images in specified directory and saves output images with drawn detections. @@ -37,8 +39,8 @@ def detect_directory(model_path, weights_path, img_path, classes, output_path, :type weights_path: str :param img_path: Path to directory with images to inference :type img_path: str - :param classes: List of class names - :type classes: [str] + :param class_config: Class configuration + :type class_config: ClassConfig :param output_path: Path to output directory :type output_path: str :param batch_size: Size of each image batch, defaults to 8 @@ -63,29 +65,36 @@ def detect_directory(model_path, weights_path, img_path, classes, output_path, output_path, conf_thres, nms_thres, - robot_class_ids=robot_class_ids + class_config.get_squeeze_config() ) _draw_and_save_output_images( - img_detections, segmentations, imgs, img_size, output_path, classes) + img_detections, segmentations, imgs, img_size, output_path, class_config.get_unsqueezed_det_class_names()) print(f"---- Detections were saved to: '{output_path}' ----") -def detect_image(model, image, img_size=416, conf_thres=0.5, nms_thres=0.5, robot_class_ids: Optional[List[int]] = None): +def detect_image(model, + image: np.ndarray, + img_size: int = 416, + conf_thres: float = 0.5, + nms_thres: float = 0.5, + squeeze_config: Optional[SqueezeConfig] = None + ): """Inferences one image with model. :param model: Model for inference :type model: models.Darknet :param image: Image to inference - :type image: nd.array + :type image: np.ndarray :param img_size: Size of each image dimension for yolo, defaults to 416 - :type img_size: int, optional + :type img_size: int :param conf_thres: Object confidence threshold, defaults to 0.5 - :type conf_thres: float, optional + :type conf_thres: float :param nms_thres: IOU threshold for non-maximum suppression, defaults to 0.5 - :type nms_thres: float, optional - :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist. - :type robot_class_ids: List[int], optional + :type nms_thres: float + :param squeeze_config: SqueezeConfiguration for this model (optional, defaults to None) + :type squeeze_config: Optional[SqueezeConfig] + :return: Detections on image with each detection in the format: [x1, y1, x2, y2, confidence, class], Segmentation as 2d numpy array with the coresponding class id in each cell :rtype: nd.array, nd.array """ @@ -105,13 +114,24 @@ def detect_image(model, image, img_size=416, conf_thres=0.5, nms_thres=0.5, robo # Get detections with torch.no_grad(): detections, segmentations = model(input_img) - detections = non_max_suppression(detections, conf_thres, nms_thres, robot_class_ids=robot_class_ids) + detections = non_max_suppression( + prediction=detections, + conf_thres=conf_thres, + iou_thres=nms_thres, + squeeze_config=squeeze_config + ) detections = rescale_boxes(detections[0], img_size, image.shape[0:2]) segmentations = rescale_segmentation(segmentations, image.shape[0:2]) return detections.numpy(), segmentations.cpu().detach().numpy() -def detect(model, dataloader, output_path, conf_thres, nms_thres, robot_class_ids: Optional[List[int]] = None): +def detect(model, + dataloader: DataLoader, + output_path: str, + conf_thres: float = 0.5, + nms_thres: float = 0.5, + squeeze_config: Optional[SqueezeConfig] = None + ): """Inferences images with model. :param model: Model for inference @@ -121,11 +141,12 @@ def detect(model, dataloader, output_path, conf_thres, nms_thres, robot_class_id :param output_path: Path to output directory :type output_path: str :param conf_thres: Object confidence threshold, defaults to 0.5 - :type conf_thres: float, optional + :type conf_thres: float :param nms_thres: IOU threshold for non-maximum suppression, defaults to 0.5 - :type nms_thres: float, optional - :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist. - :type robot_class_ids: List[int], optional + :type nms_thres: float + :param squeeze_config: SqueezeConfiguration for this model (optional, defaults to None) + :type squeeze_config: Optional[SqueezeConfig] + :return: List of detections. The coordinates are given for the padded image that is provided by the dataloader. Use `utils.rescale_boxes` to transform them into the desired input image coordinate system before its transformed by the dataloader), List of input image paths @@ -149,7 +170,12 @@ def detect(model, dataloader, output_path, conf_thres, nms_thres, robot_class_id # Get detections with torch.no_grad(): detections, segmentations = model(input_imgs) - detections = non_max_suppression(detections, conf_thres, nms_thres, robot_class_ids=robot_class_ids) + detections = non_max_suppression( + prediction=detections, + conf_thres=conf_thres, + iou_thres=nms_thres, + squeeze_config=squeeze_config + ) # Store image and detections img_detections.extend(detections) @@ -310,33 +336,24 @@ def run(): parser.add_argument("--n_cpu", type=int, default=8, help="Number of cpu threads to use during batch generation") parser.add_argument("--conf_thres", type=float, default=0.5, help="Object confidence threshold") parser.add_argument("--nms_thres", type=float, default=0.4, help="IOU threshold for non-maximum suppression") - parser.add_argument("--multiple_robot_classes", action="store_true", - help="If multiple robot classes exist and nms shall be performed across all robot classes") + parser.add_argument("--class_config", type=str, default="class_config/default.yaml", help="Class configuration for evaluation") args = parser.parse_args() print(f"Command line arguments: {args}") - # Extract class names from file - classes = load_classes(args.classes)['detection'] # List of class names - - robot_class_ids = None - if args.multiple_robot_classes: - robot_class_ids = [] - for idx, c in enumerate(classes): - if "robot" in c: - robot_class_ids.append(idx) + class_names = ClassNames.load_from(args.classes) + class_config = ClassConfig.load_from(args.class_config, class_names) detect_directory( args.model, args.weights, args.images, - classes, + class_config, args.output, batch_size=args.batch_size, img_size=args.img_size, n_cpu=args.n_cpu, conf_thres=args.conf_thres, nms_thres=args.nms_thres, - robot_class_ids=robot_class_ids ) diff --git a/yoeo/test.py b/yoeo/test.py index d266572..d93d1b7 100755 --- a/yoeo/test.py +++ b/yoeo/test.py @@ -1,7 +1,7 @@ #! /usr/bin/env python3 from __future__ import division, annotations -from typing import List, Optional +from typing import List, Optional, Tuple import argparse import tqdm @@ -14,16 +14,18 @@ from torch.autograd import Variable from yoeo.models import load_model -from yoeo.utils.utils import load_classes, ap_per_class, get_batch_statistics, non_max_suppression, to_cpu, xywh2xyxy, \ +from yoeo.utils.utils import ap_per_class, get_batch_statistics, non_max_suppression, to_cpu, xywh2xyxy, \ print_environment_info, seg_iou from yoeo.utils.datasets import ListDataset from yoeo.utils.transforms import DEFAULT_TRANSFORMS +from yoeo.utils.dataclasses import ClassNames +from yoeo.utils.class_config import ClassConfig from yoeo.utils.parse_config import parse_data_config +from yoeo.utils.metric import Metric -def evaluate_model_file(model_path, weights_path, img_path, class_names, batch_size=8, img_size=416, - n_cpu=8, iou_thres=0.5, conf_thres=0.5, nms_thres=0.5, verbose=True, - robot_class_ids: Optional[List[int]] = None): +def evaluate_model_file(model_path, weights_path, img_path, class_config, batch_size=8, img_size=416, + n_cpu=8, iou_thres=0.5, conf_thres=0.5, nms_thres=0.5, verbose=True): """Evaluate model on validation dataset. :param model_path: Path to model definition file (.cfg) @@ -32,8 +34,8 @@ def evaluate_model_file(model_path, weights_path, img_path, class_names, batch_s :type weights_path: str :param img_path: Path to file containing all paths to validation images. :type img_path: str - :param class_names: Dict containing detection and segmentation class names - :type class_names: Dict + :param class_config: Object containing all class name related settings + :type class_config: TrainConfig :param batch_size: Size of each image batch, defaults to 8 :type batch_size: int, optional :param img_size: Size of each image dimension for yolo, defaults to 416 @@ -48,62 +50,82 @@ def evaluate_model_file(model_path, weights_path, img_path, class_names, batch_s :type nms_thres: float, optional :param verbose: If True, prints stats of model, defaults to True :type verbose: bool, optional - :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist. - :type robot_class_ids: List[int], optional :return: Returns precision, recall, AP, f1, ap_class """ dataloader = _create_validation_data_loader( img_path, batch_size, img_size, n_cpu) model = load_model(model_path, weights_path) - metrics_output, seg_class_ious = _evaluate( + metrics_output, seg_class_ious, secondary_metric = _evaluate( model, dataloader, - class_names, + class_config, img_size, iou_thres, conf_thres, nms_thres, - verbose, - robot_class_ids=robot_class_ids) - return metrics_output, seg_class_ious + verbose) + return metrics_output, seg_class_ious, secondary_metric -def print_eval_stats(metrics_output, seg_class_ious, class_names, verbose): +def print_eval_stats(metrics_output: Optional[Tuple[np.ndarray]], + seg_class_ious: List[np.float64], + secondary_metric: Optional[Metric], + class_config: ClassConfig, + verbose: bool + ): # Print detection statistics + print("#### Detection ####") if metrics_output is not None: precision, recall, AP, f1, ap_class = metrics_output if verbose: # Prints class AP and mean AP ap_table = [["Index", "Class", "AP"]] + class_names = class_config.get_squeezed_det_class_names() for i, c in enumerate(ap_class): - ap_table += [[c, class_names['detection'][c], "%.5f" % AP[i]]] + ap_table += [[c, class_names[c], "%.5f" % AP[i]]] print(AsciiTable(ap_table).table) print(f"---- mAP {AP.mean():.5f} ----") else: print("---- mAP not measured (no detections found by model) ----") + if secondary_metric is not None: + print("#### Detection - Secondary ####") + mbACC = secondary_metric.mbACC() + + if verbose: + classes = class_config.get_squeeze_class_names() + mbACC_per_class = [secondary_metric.bACC(i) for i in range(len(classes))] + + sec_table = [["Index", "Class", "bACC"]] + for i, c in enumerate(classes): + sec_table += [[i, c, "%.5f" % mbACC_per_class[i]]] + print(AsciiTable(sec_table).table) + + print(f"---- mbACC {mbACC:.5f} ----") + + print("#### Segmentation ####") # Print segmentation statistics if verbose: # Print IoU per segmentation class seg_table = [["Index", "Class", "IoU"]] + class_names = class_config.get_seg_class_names() for i, iou in enumerate(seg_class_ious): - seg_table += [[i, class_names['segmentation'][i], "%.5f" % iou]] + seg_table += [[i, class_names[i], "%.5f" % iou]] print(AsciiTable(seg_table).table) # Print mean IoU mean_seg_class_ious = np.array(seg_class_ious).mean() print(f"----Average IoU {mean_seg_class_ious:.5f} ----") -def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, nms_thres, verbose, - robot_class_ids: Optional[List[int]] = None): +def _evaluate(model, dataloader, class_config, img_size, iou_thres, conf_thres, nms_thres, verbose): """Evaluate model on validation dataset. :param model: Model to evaluate :type model: models.Darknet :param dataloader: Dataloader provides the batches of images with targets :type dataloader: DataLoader - :param class_names: Dict containing detection and segmentation class names - :type class_names: Dict + :param class_config: Object storing all class related settings + :type class_config: TrainConfig :param img_size: Size of each image dimension for yolo :type img_size: int :param iou_thres: IOU threshold required to qualify as detected @@ -114,8 +136,6 @@ def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, n :type nms_thres: float :param verbose: If True, prints stats of model :type verbose: bool - :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist. - :type robot_class_ids: List[int], optional :return: Returns precision, recall, AP, f1, ap_class """ model.eval() # Set model to evaluation mode @@ -127,9 +147,21 @@ def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, n seg_ious = [] import time times = [] + + if class_config.classes_should_be_squeezed(): + secondary_metric = Metric(len(class_config.get_squeeze_ids())) + else: + secondary_metric = None + for _, imgs, bb_targets, mask_targets in tqdm.tqdm(dataloader, desc="Validating"): # Extract labels labels += bb_targets[:, 1].tolist() + + # If a subset of the detection classes should be squeezed into one class for non-maximum suppression and the + # subsequent AP-computation, we need to squeeze those class labels here. + if class_config.classes_should_be_squeezed(): + labels = class_config.squeeze(labels) + # Rescale target bb_targets[:, 2:] = xywh2xyxy(bb_targets[:, 2:]) bb_targets[:, 2:] *= img_size @@ -144,10 +176,20 @@ def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, n yolo_outputs, conf_thres=conf_thres, iou_thres=nms_thres, - robot_class_ids=robot_class_ids + squeeze_config=class_config.get_squeeze_config() ) - sample_metrics += get_batch_statistics(yolo_outputs, bb_targets, iou_threshold=iou_thres) + sample_stat, secondary_stat = get_batch_statistics( + yolo_outputs, + bb_targets, + iou_threshold=iou_thres, + squeeze_config=class_config.get_squeeze_config() + ) + + sample_metrics += sample_stat + + if class_config.classes_should_be_squeezed(): + secondary_metric += secondary_stat seg_ious.append(seg_iou(to_cpu(segmentation_outputs), mask_targets, model.num_seg_classes)) @@ -160,6 +202,7 @@ def _evaluate(model, dataloader, class_names, img_size, iou_thres, conf_thres, n # Concatenate sample statistics true_positives, pred_scores, pred_labels = [ np.concatenate(x, 0) for x in list(zip(*sample_metrics))] + yolo_metrics_output = ap_per_class( true_positives, pred_scores, pred_labels, labels) @@ -175,9 +218,9 @@ def seg_iou_mean_without_nan(seg_iou: List[float]) -> np.ndarray: seg_class_ious = [seg_iou_mean_without_nan(class_ious) for class_ious in list(zip(*seg_ious))] - print_eval_stats(yolo_metrics_output, seg_class_ious, class_names, verbose) + print_eval_stats(yolo_metrics_output, seg_class_ious, secondary_metric, class_config, verbose) - return yolo_metrics_output, seg_class_ious + return yolo_metrics_output, seg_class_ious, secondary_metric def _create_validation_data_loader(img_path, batch_size, img_size, n_cpu): @@ -221,8 +264,7 @@ def run(): parser.add_argument("--iou_thres", type=float, default=0.5, help="IOU threshold required to qualify as detected") parser.add_argument("--conf_thres", type=float, default=0.01, help="Object confidence threshold") parser.add_argument("--nms_thres", type=float, default=0.4, help="IOU threshold for non-maximum suppression") - parser.add_argument("--multiple_robot_classes", action="store_true", - help="If multiple robot classes exist and nms shall be performed across all robot classes") + parser.add_argument("--class_config", type=str, default="class_config/default.yaml", help="Class configuration for evaluation") args = parser.parse_args() print(f"Command line arguments: {args}") @@ -231,28 +273,22 @@ def run(): data_config = parse_data_config(args.data) # Path to file containing all images for validation valid_path = data_config["valid"] - class_names = load_classes(data_config["names"]) # Detection and segmentation class names - robot_class_ids = None - if args.multiple_robot_classes: - robot_class_ids = [] - for idx, c in enumerate(class_names["detection"]): - if "robot" in c: - robot_class_ids.append(idx) + class_names = ClassNames.load_from(data_config["names"]) # Detection and segmentation class names + class_config = ClassConfig.load_from(args.class_config, class_names) evaluate_model_file( args.model, args.weights, valid_path, - class_names, + class_config, batch_size=args.batch_size, img_size=args.img_size, n_cpu=args.n_cpu, iou_thres=args.iou_thres, conf_thres=args.conf_thres, nms_thres=args.nms_thres, - verbose=True, - robot_class_ids=robot_class_ids + verbose=args.verbose, ) diff --git a/yoeo/train.py b/yoeo/train.py index ef328cd..6f582fe 100755 --- a/yoeo/train.py +++ b/yoeo/train.py @@ -13,12 +13,12 @@ import torch.optim as optim from torch.autograd import Variable -from typing import List, Optional - from yoeo.models import load_model from yoeo.utils.logger import Logger -from yoeo.utils.utils import to_cpu, load_classes, print_environment_info, provide_determinism, worker_seed_set +from yoeo.utils.utils import to_cpu, print_environment_info, provide_determinism, worker_seed_set from yoeo.utils.datasets import ListDataset +from yoeo.utils.dataclasses import ClassNames +from yoeo.utils.class_config import ClassConfig from yoeo.utils.augmentations import AUGMENTATION_TRANSFORMS from yoeo.utils.transforms import DEFAULT_TRANSFORMS from yoeo.utils.parse_config import parse_data_config @@ -80,8 +80,7 @@ def run(): parser.add_argument("--nms_thres", type=float, default=0.5, help="Evaluation: IOU threshold for non-maximum suppression") parser.add_argument("--logdir", type=str, default="logs", help="Directory for training log files (e.g. for TensorBoard)") parser.add_argument("--seed", type=int, default=-1, help="Makes results reproducable. Set -1 to disable.") - parser.add_argument("--multiple_robot_classes", action="store_true", - help="If multiple robot classes exist and nms shall be performed across all robot classes") + parser.add_argument("--class_config", type=str, default="class_config/default.yaml", help="Class configuration for evaluation") args = parser.parse_args() print(f"Command line arguments: {args}") @@ -98,14 +97,9 @@ def run(): data_config = parse_data_config(args.data) train_path = data_config["train"] valid_path = data_config["valid"] - class_names = load_classes(data_config["names"]) - robot_class_ids = None - if args.multiple_robot_classes: - robot_class_ids = [] - for idx, c in enumerate(class_names["detection"]): - if "robot" in c: - robot_class_ids.append(idx) + class_names = ClassNames.load_from(data_config["names"]) + class_config = ClassConfig.load_from(args.class_config, class_names) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -256,13 +250,12 @@ def run(): metrics_output = _evaluate( model, validation_dataloader, - class_names, + class_config=class_config, img_size=model.hyperparams['height'], iou_thres=args.iou_thres, conf_thres=args.conf_thres, nms_thres=args.nms_thres, verbose=args.verbose, - robot_class_ids=robot_class_ids ) if metrics_output is not None: @@ -274,6 +267,10 @@ def run(): ("validation/mAP", AP.mean()), ("validation/f1", f1.mean()), ("validation/seg_iou", np.array(seg_class_ious).mean())] + + if metrics_output[2] is not None: + evaluation_metrics.append(("validation/secondary_mbACC", metrics_output[2].mbACC())) + logger.list_of_scalars_summary(evaluation_metrics, epoch) diff --git a/yoeo/utils/class_config.py b/yoeo/utils/class_config.py index 5fb8ff5..c16d303 100644 --- a/yoeo/utils/class_config.py +++ b/yoeo/utils/class_config.py @@ -79,6 +79,12 @@ def get_squeeze_config(self) -> Optional[SqueezeConfig]: return SqueezeConfig(squeeze_ids, surrogate_id) def get_squeeze_class_names(self) -> List[str]: + """ + Get the class names of the classes that should be squeezed together during evaluation + + :return: a list of class names that should be squeezed together during evaluation + :rtype: List[str] + """ return self._class_names_to_squeeze def get_surrogate_id(self) -> Optional[int]: @@ -92,16 +98,26 @@ def get_surrogate_id(self) -> Optional[int]: """ return None if not self._ids_to_squeeze else self._ids_to_squeeze[0] - def get_det_class_names(self) -> List[str]: + def get_squeezed_det_class_names(self) -> List[str]: """ - Get the (squeezed) list of detection class names. + Get the squeezed list of detection class names. - :return: The (squeezed) list of detection class names. + :return: The squeezed list of detection class names. :rtype: List[str] """ return self._squeezed_det_class_names + def get_unsqueezed_det_class_names(self) -> List[str]: + """ + Get the unsqueezed list of detection class names. + + :return: The unsqueezed list of detection class names. + :rtype: List[str] + """ + + return self._det_class_names + def get_seg_class_names(self) -> List[str]: """ Get the list of segmentation class names. @@ -144,10 +160,20 @@ def classes_should_be_squeezed(self) -> bool: return self._ids_to_squeeze is not None def squeeze(self, labels: List[int]) -> List[int]: + """ + Squeeze a list of class ids. Given a set of classes that should be squeezed X, replace all class ids in X by + the surrogate id. + + :param labels: list of class ids to squeeze. + :type labels: List[int] + + :return: squeezed list of class ids where + :rtype: List[int] + """ surrogate_id = self.get_surrogate_id() return [label if label not in self._ids_to_squeeze else surrogate_id for label in labels] - + @classmethod def load_from(cls, path: str, class_names: ClassNames) -> ClassConfig: content = cls._read_yaml_file(path) diff --git a/yoeo/utils/utils.py b/yoeo/utils/utils.py index a11bbe8..c526694 100644 --- a/yoeo/utils/utils.py +++ b/yoeo/utils/utils.py @@ -1,7 +1,5 @@ from __future__ import division, annotations -from typing import Tuple - import time import platform import tqdm @@ -11,8 +9,10 @@ import numpy as np import subprocess import random -from typing import List, Optional -import yaml +from typing import List, Optional, Tuple + +from yoeo.utils.dataclasses import SqueezeConfig +from yoeo.utils.metric import Metric def provide_determinism(seed=42): @@ -45,16 +45,6 @@ def to_cpu(tensor): return tensor.detach().cpu() -def load_classes(path: str) -> dict: - with open(path, 'r', encoding="utf-8") as fp: - names = yaml.load(fp, Loader=yaml.SafeLoader) - - assert "detection" in names.keys(), f"Missing key 'detection' in {path}" - assert "segmentation" in names.keys(), f"Missing key 'segmentation' in {path}" - - return names - - def weights_init_normal(m): classname = m.__class__.__name__ if classname.find("Conv") != -1: @@ -298,11 +288,33 @@ def compute_ap(recall, precision): return ap -def get_batch_statistics(outputs, targets, iou_threshold): +def get_batch_statistics(outputs, + targets, + iou_threshold, + squeeze_config: Optional[SqueezeConfig] = None + ) -> Tuple[List, Optional[Metric]]: + """ + Calculcate the batch statistics. If 'squeeze_config' is not 'None', the contained classes will be squeezed into one + class ('SqueezeConfig.surrogate_id') for batch statistics evaluation and evalutated separately on a secondary class + label. The statistics for the latter are returned as a 'Metric' object. If 'squeeze_config' is None, no 'Metric' + object will be returned and the tuple will simply contain 'None' at the respective position. + + :return: The batch statistics, as well as an optional Metric object for the secondary class argument if + 'squeeze_config' is not None + :rtype: Tuple[List, Optional[Metric]] + """ """ Compute true positives, predicted scores and predicted labels per sample """ batch_metrics = [] - for sample_i in range(len(outputs)): + squeeze_active: bool = squeeze_config is not None + + if squeeze_active: + secondary_metric = Metric(len(squeeze_config.squeeze_ids)) + squeeze_ids = torch.tensor(squeeze_config.squeeze_ids) + else: + secondary_metric = None + + for sample_i in range(len(outputs)): if outputs[sample_i] is None: continue @@ -311,16 +323,24 @@ def get_batch_statistics(outputs, targets, iou_threshold): pred_scores = output[:, 4] pred_labels = output[:, -1] + if squeeze_active: + sec_pred_labels = compute_secondary_labels(pred_labels, squeeze_ids) + pred_labels = squeeze_primary_labels(pred_labels, squeeze_ids, squeeze_config.surrogate_id) + true_positives = np.zeros(pred_boxes.shape[0]) annotations = targets[targets[:, 0] == sample_i][:, 1:] target_labels = annotations[:, 0] if len(annotations) else [] + + if squeeze_active and type(target_labels) is not list: + sec_target_labels = compute_secondary_labels(target_labels, squeeze_ids) + target_labels = squeeze_primary_labels(target_labels, squeeze_ids, squeeze_config.surrogate_id) + if len(annotations): detected_boxes = [] target_boxes = annotations[:, 1:] for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)): - # If targets are found break if len(detected_boxes) == len(annotations): break @@ -343,8 +363,36 @@ def get_batch_statistics(outputs, targets, iou_threshold): if iou >= iou_threshold and box_index not in detected_boxes: true_positives[pred_i] = 1 detected_boxes += [box_index] + + if squeeze_active: + sec_pred_label = sec_pred_labels[pred_i] + + if pred_label in squeeze_ids: + secondary_metric.update(sec_pred_label.int(), sec_target_labels[box_index].int()) + batch_metrics.append([true_positives, pred_scores, pred_labels]) - return batch_metrics + + return batch_metrics, secondary_metric + +def compute_secondary_labels(labels: torch.tensor, squeeze_ids: torch.tensor) -> torch.tensor: + secondary_labels = labels.clone() + + # We replace the actual class labels with values from {0, ...} for classes that should be squeezed into a + # single class. All other classes get the label -1. + for idx, squeeze_id in enumerate(squeeze_ids): + # Replace label with value in {0, ...} + secondary_labels[labels == squeeze_id] = idx + + # Replace all other labels with -1 + secondary_labels[torch.logical_not(torch.isin(labels, squeeze_ids))] = -1 + + return secondary_labels + +def squeeze_primary_labels(labels: torch.tensor, squeeze_ids: torch.tensor, surrogate_id: int) -> torch.tesor: + # Replace all primary labels that are contained in squeeze_ids with the surrogate_id + labels[torch.isin(labels, squeeze_ids)] = surrogate_id + + return labels def bbox_wh_iou(wh1, wh2): @@ -419,8 +467,11 @@ def box_area(box): def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, - robot_class_ids: Optional[List[int]] = None): - """Performs Non-Maximum Suppression (NMS) on inference results + squeeze_config: Optional[SqueezeConfig] = None): + """ + Performs Non-Maximum Suppression (NMS) on inference results. If 'squeeze_config' is not 'None', the contained + classes will be treated as one class ('SqueezeConfig.surrogate_id') during non-maximum supression. + Returns: detections with shape: nx6 (x1, y1, x2, y2, conf, cls) """ @@ -437,8 +488,8 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non t = time.time() output = [torch.zeros((0, 6), device="cpu")] * prediction.shape[0] - if robot_class_ids: - robot_class_ids = torch.tensor(robot_class_ids, device=prediction.device, dtype=prediction.dtype) + if squeeze_config: + squeeze_ids = torch.tensor(squeeze_config.squeeze_ids, device=prediction.device, dtype=prediction.dtype) for xi, x in enumerate(prediction): # image index, image inference # Apply constraints @@ -476,13 +527,13 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non x = x[x[:, 4].argsort(descending=True)[:max_nms]] # Batched NMS - if robot_class_ids is None: + if squeeze_config is None: c = x[:, 5:6] * max_wh # classes else: - # If multiple robot classes are present, all robot classes are treated as one class in order to perform - # nms across all classes and not per class. For this, all robot classes get the same offset. + # If for example multiple robot classes are present, all robot classes are treated as one class in order + # to perform nms across all classes and not per class. For this, all robot classes get the same offset. c = torch.clone(x[:, 5:6]) - c[torch.isin(c, robot_class_ids)] = robot_class_ids[0] + c[torch.isin(c, squeeze_ids)] = squeeze_config.surrogate_id c *= max_wh # boxes (offset by class), scores