From a7a4e37089fa139144766ac655fb6c76eb84aee6 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Tue, 5 Dec 2023 17:53:18 +0800 Subject: [PATCH] refactor init --- .gitignore | 1 + configs/_base_/datasets/waymoD5-3d-3class.py | 17 +- configs/_base_/datasets/waymoD5-3d-car.py | 15 +- mmdet3d/datasets/det3d_dataset.py | 17 +- mmdet3d/datasets/waymo_dataset.py | 117 ++- mmdet3d/engine/hooks/visualization_hook.py | 4 +- .../waymo_utils/prediction_to_waymo.py | 367 ++------- mmdet3d/evaluation/metrics/waymo_metric.py | 634 ++++------------ ...-attn_4xb4-cyclic-20e_waymoD5-3d-3class.py | 13 +- tools/create_data.py | 134 ++-- tools/create_data.sh | 7 +- .../dataset_converters/create_gt_database.py | 28 +- tools/dataset_converters/waymo_converter.py | 696 ++++++++++-------- 13 files changed, 802 insertions(+), 1248 deletions(-) diff --git a/.gitignore b/.gitignore index 27cb9c7cb4..2fefc6a904 100644 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,4 @@ data/sunrgbd/OFFICIAL_SUNRGBD/ # Waymo evaluation mmdet3d/evaluation/functional/waymo_utils/compute_detection_metrics_main mmdet3d/evaluation/functional/waymo_utils/compute_detection_let_metrics_main +mmdet3d/evaluation/functional/waymo_utils/compute_segmentation_metrics_main diff --git a/configs/_base_/datasets/waymoD5-3d-3class.py b/configs/_base_/datasets/waymoD5-3d-3class.py index e5240b629e..f8f14998d2 100644 --- a/configs/_base_/datasets/waymoD5-3d-3class.py +++ b/configs/_base_/datasets/waymoD5-3d-3class.py @@ -89,7 +89,10 @@ dict( type='PointsRangeFilter', point_cloud_range=point_cloud_range) ]), - dict(type='Pack3DDetInputs', keys=['points']) + dict( + type='Pack3DDetInputs', + keys=['points'], + meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp']) ] # construct a pipeline for data and gt loading in show function # please keep its loading function consistent with test_pipeline (e.g. client) @@ -100,7 +103,10 @@ load_dim=6, use_dim=5, backend_args=backend_args), - dict(type='Pack3DDetInputs', keys=['points']), + dict( + type='Pack3DDetInputs', + keys=['points'], + meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp']) ] train_dataloader = dict( @@ -164,12 +170,7 @@ backend_args=backend_args)) val_evaluator = dict( - type='WaymoMetric', - ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl', - waymo_bin_file='./data/waymo/waymo_format/gt.bin', - data_root='./data/waymo/waymo_format', - backend_args=backend_args, - convert_kitti_format=False) + type='WaymoMetric', waymo_bin_file='./data/waymo/waymo_format/gt.bin') test_evaluator = val_evaluator vis_backends = [dict(type='LocalVisBackend')] diff --git a/configs/_base_/datasets/waymoD5-3d-car.py b/configs/_base_/datasets/waymoD5-3d-car.py index f95ac1d817..972e9289be 100644 --- a/configs/_base_/datasets/waymoD5-3d-car.py +++ b/configs/_base_/datasets/waymoD5-3d-car.py @@ -62,7 +62,8 @@ dict(type='PointShuffle'), dict( type='Pack3DDetInputs', - keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) + keys=['points'], + meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp']) ] test_pipeline = [ dict( @@ -86,7 +87,10 @@ dict( type='PointsRangeFilter', point_cloud_range=point_cloud_range) ]), - dict(type='Pack3DDetInputs', keys=['points']) + dict( + type='Pack3DDetInputs', + keys=['points'], + meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp']) ] # construct a pipeline for data and gt loading in show function # please keep its loading function consistent with test_pipeline (e.g. client) @@ -161,12 +165,7 @@ backend_args=backend_args)) val_evaluator = dict( - type='WaymoMetric', - ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl', - waymo_bin_file='./data/waymo/waymo_format/gt.bin', - data_root='./data/waymo/waymo_format', - convert_kitti_format=False, - backend_args=backend_args) + type='WaymoMetric', waymo_bin_file='./data/waymo/waymo_format/gt.bin') test_evaluator = val_evaluator vis_backends = [dict(type='LocalVisBackend')] diff --git a/mmdet3d/datasets/det3d_dataset.py b/mmdet3d/datasets/det3d_dataset.py index 11caae4729..c701a893fd 100644 --- a/mmdet3d/datasets/det3d_dataset.py +++ b/mmdet3d/datasets/det3d_dataset.py @@ -113,7 +113,7 @@ def __init__(self, ori_label = self.METAINFO['classes'].index(name) self.label_mapping[ori_label] = label_idx - self.num_ins_per_cat = {name: 0 for name in metainfo['classes']} + self.num_ins_per_cat = [0] * len(metainfo['classes']) else: self.label_mapping = { i: i @@ -121,10 +121,7 @@ def __init__(self, } self.label_mapping[-1] = -1 - self.num_ins_per_cat = { - name: 0 - for name in self.METAINFO['classes'] - } + self.num_ins_per_cat = [0] * len(self.METAINFO['classes']) super().__init__( ann_file=ann_file, @@ -146,9 +143,12 @@ def __init__(self, # show statistics of this dataset print_log('-' * 30, 'current') - print_log(f'The length of the dataset: {len(self)}', 'current') + print_log( + f'The length of {"test" if self.test_mode else "training"} dataset: {len(self)}', # noqa: E501 + 'current') content_show = [['category', 'number']] - for cat_name, num in self.num_ins_per_cat.items(): + for label, num in enumerate(self.num_ins_per_cat): + cat_name = self.metainfo['classes'][label] content_show.append([cat_name, num]) table = AsciiTable(content_show) print_log( @@ -256,8 +256,7 @@ def parse_ann_info(self, info: dict) -> Union[dict, None]: for label in ann_info['gt_labels_3d']: if label != -1: - cat_name = self.metainfo['classes'][label] - self.num_ins_per_cat[cat_name] += 1 + self.num_ins_per_cat[label] += 1 return ann_info diff --git a/mmdet3d/datasets/waymo_dataset.py b/mmdet3d/datasets/waymo_dataset.py index 5b3a83824e..cda27e42e5 100644 --- a/mmdet3d/datasets/waymo_dataset.py +++ b/mmdet3d/datasets/waymo_dataset.py @@ -3,9 +3,11 @@ from typing import Callable, List, Union import numpy as np +from mmengine import print_log +from mmengine.fileio import load from mmdet3d.registry import DATASETS -from mmdet3d.structures import CameraInstance3DBoxes +from mmdet3d.structures import CameraInstance3DBoxes, LiDARInstance3DBoxes from .det3d_dataset import Det3DDataset from .kitti_dataset import KittiDataset @@ -163,13 +165,10 @@ def parse_ann_info(self, info: dict) -> dict: centers_2d = np.zeros((0, 2), dtype=np.float32) depths = np.zeros((0), dtype=np.float32) - # in waymo, lidar2cam = R0_rect @ Tr_velo_to_cam - # convert gt_bboxes_3d to velodyne coordinates with `lidar2cam` - lidar2cam = np.array(info['images'][self.default_cam_key]['lidar2cam']) - gt_bboxes_3d = CameraInstance3DBoxes( - ann_info['gt_bboxes_3d']).convert_to(self.box_mode_3d, - np.linalg.inv(lidar2cam)) - ann_info['gt_bboxes_3d'] = gt_bboxes_3d + if self.load_type == 'frame_based': + gt_bboxes_3d = LiDARInstance3DBoxes(ann_info['gt_bboxes_3d']) + else: + gt_bboxes_3d = CameraInstance3DBoxes(ann_info['gt_bboxes_3d']) anns_results = dict( gt_bboxes_3d=gt_bboxes_3d, @@ -182,9 +181,58 @@ def parse_ann_info(self, info: dict) -> dict: return anns_results def load_data_list(self) -> List[dict]: - """Add the load interval.""" - data_list = super().load_data_list() - data_list = data_list[::self.load_interval] + """Add the load interval. + + Returns: + list[dict]: A list of annotation. + """ # noqa: E501 + # `self.ann_file` denotes the absolute annotation file path if + # `self.root=None` or relative path if `self.root=/path/to/data/`. + annotations = load(self.ann_file) + if not isinstance(annotations, dict): + raise TypeError(f'The annotations loaded from annotation file ' + f'should be a dict, but got {type(annotations)}!') + if 'data_list' not in annotations or 'metainfo' not in annotations: + raise ValueError('Annotation must have data_list and metainfo ' + 'keys') + metainfo = annotations['metainfo'] + raw_data_list = annotations['data_list'] + raw_data_list = raw_data_list[::self.load_interval] + if self.load_interval > 1: + print_log( + f'Sample size will be reduced to 1/{self.load_interval} of' + ' the original data sample', + logger='current') + + # Meta information load from annotation file will not influence the + # existed meta information load from `BaseDataset.METAINFO` and + # `metainfo` arguments defined in constructor. + for k, v in metainfo.items(): + self._metainfo.setdefault(k, v) + + # load and parse data_infos. + data_list = [] + for raw_data_info in raw_data_list: + # parse raw data information to target format + data_info = self.parse_data_info(raw_data_info) + if isinstance(data_info, dict): + # For image tasks, `data_info` should information if single + # image, such as dict(img_path='xxx', width=360, ...) + data_list.append(data_info) + elif isinstance(data_info, list): + # For video tasks, `data_info` could contain image + # information of multiple frames, such as + # [dict(video_path='xxx', timestamps=...), + # dict(video_path='xxx', timestamps=...)] + for item in data_info: + if not isinstance(item, dict): + raise TypeError('data_info must be list of dict, but ' + f'got {type(item)}') + data_list.extend(data_info) + else: + raise TypeError('data_info should be a dict or list of dict, ' + f'but got {type(data_info)}') + return data_list def parse_data_info(self, info: dict) -> Union[dict, List[dict]]: @@ -203,44 +251,39 @@ def parse_data_info(self, info: dict) -> Union[dict, List[dict]]: info['images'][self.default_cam_key] info['images'] = new_image_info info['instances'] = info['cam_instances'][self.default_cam_key] - return super().parse_data_info(info) + return Det3DDataset.parse_data_info(self, info) else: # in the mono3d, the instances is from cam sync. + # Convert frame-based infos to multi-view image-based data_list = [] - if self.modality['use_lidar']: - info['lidar_points']['lidar_path'] = \ - osp.join( - self.data_prefix.get('pts', ''), - info['lidar_points']['lidar_path']) - - if self.modality['use_camera']: - for cam_key, img_info in info['images'].items(): - if 'img_path' in img_info: - cam_prefix = self.data_prefix.get(cam_key, '') - img_info['img_path'] = osp.join( - cam_prefix, img_info['img_path']) - for (cam_key, img_info) in info['images'].items(): camera_info = dict() + camera_info['sample_idx'] = info['sample_idx'] + camera_info['timestamp'] = info['timestamp'] + camera_info['context_name'] = info['context_name'] camera_info['images'] = dict() camera_info['images'][cam_key] = img_info - if 'cam_instances' in info \ - and cam_key in info['cam_instances']: - camera_info['instances'] = info['cam_instances'][cam_key] + if 'img_path' in img_info: + cam_prefix = self.data_prefix.get(cam_key, '') + camera_info['images'][cam_key]['img_path'] = osp.join( + cam_prefix, img_info['img_path']) + if 'lidar2cam' in img_info: + camera_info['lidar2cam'] = np.array(img_info['lidar2cam']) + if 'cam2img' in img_info: + camera_info['cam2img'] = np.array(img_info['cam2img']) + if 'lidar2img' in img_info: + camera_info['lidar2img'] = np.array(img_info['lidar2img']) else: - camera_info['instances'] = [] - camera_info['ego2global'] = info['ego2global'] - if 'image_sweeps' in info: - camera_info['image_sweeps'] = info['image_sweeps'] - - # TODO check if need to modify the sample id - # TODO check when will use it except for evaluation. - camera_info['sample_idx'] = info['sample_idx'] + camera_info['lidar2img'] = camera_info[ + 'cam2img'] @ camera_info['lidar2cam'] if not self.test_mode: # used in training + camera_info['instances'] = info['cam_instances'][cam_key] camera_info['ann_info'] = self.parse_ann_info(camera_info) if self.test_mode and self.load_eval_anns: - info['eval_ann_info'] = self.parse_ann_info(info) + camera_info['instances'] = info['cam_instances'][cam_key] + camera_info['eval_ann_info'] = self.parse_ann_info( + camera_info) data_list.append(camera_info) return data_list diff --git a/mmdet3d/engine/hooks/visualization_hook.py b/mmdet3d/engine/hooks/visualization_hook.py index ffec1addc3..9de46d9692 100644 --- a/mmdet3d/engine/hooks/visualization_hook.py +++ b/mmdet3d/engine/hooks/visualization_hook.py @@ -78,11 +78,11 @@ def __init__(self, 'needs to be excluded.') self.vis_task = vis_task - if wait_time == -1: + if show and wait_time == -1: print_log( 'Manual control mode, press [Right] to next sample.', logger='current') - else: + elif show: print_log( 'Autoplay mode, press [SPACE] to pause.', logger='current') self.wait_time = wait_time diff --git a/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py b/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py index b9da8043d2..c1729e7b89 100644 --- a/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py +++ b/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py @@ -4,7 +4,6 @@ """ try: - from waymo_open_dataset import dataset_pb2 as open_dataset from waymo_open_dataset import label_pb2 from waymo_open_dataset.protos import metrics_pb2 from waymo_open_dataset.protos.metrics_pb2 import Objects @@ -14,13 +13,10 @@ 'Please run "pip install waymo-open-dataset-tf-2-1-0==1.2.0" ' 'to install the official devkit first.') -from glob import glob -from os.path import join -from typing import List, Optional +from typing import List import mmengine -import numpy as np -import tensorflow as tf +from mmengine import print_log class Prediction2Waymo(object): @@ -32,54 +28,22 @@ class Prediction2Waymo(object): Args: results (list[dict]): Prediction results. - waymo_tfrecords_dir (str): Directory to load waymo raw data. waymo_results_save_dir (str): Directory to save converted predictions in waymo format (.bin files). waymo_results_final_path (str): Path to save combined predictions in waymo format (.bin file), like 'a/b/c.bin'. - prefix (str): Prefix of filename. In general, 0 for training, 1 for - validation and 2 for testing. - classes (dict): A list of class name. - workers (str): Number of parallel processes. Defaults to 2. - backend_args (dict, optional): Arguments to instantiate the - corresponding backend. Defaults to None. - from_kitti_format (bool, optional): Whether the reuslts are kitti - format. Defaults to False. - idx2metainfo (Optional[dict], optional): The mapping from sample_idx to - metainfo. The metainfo must contain the keys: 'idx2contextname' and - 'idx2timestamp'. Defaults to None. + num_workers (str): Number of parallel processes. Defaults to 4. """ def __init__(self, results: List[dict], - waymo_tfrecords_dir: str, - waymo_results_save_dir: str, waymo_results_final_path: str, - prefix: str, classes: dict, - workers: int = 2, - backend_args: Optional[dict] = None, - from_kitti_format: bool = False, - idx2metainfo: Optional[dict] = None): - + num_workers: int = 4): self.results = results - self.waymo_tfrecords_dir = waymo_tfrecords_dir - self.waymo_results_save_dir = waymo_results_save_dir self.waymo_results_final_path = waymo_results_final_path - self.prefix = prefix self.classes = classes - self.workers = int(workers) - self.backend_args = backend_args - self.from_kitti_format = from_kitti_format - if idx2metainfo is not None: - self.idx2metainfo = idx2metainfo - # If ``fast_eval``, the metainfo does not need to be read from - # original data online. It's preprocessed offline. - self.fast_eval = True - else: - self.fast_eval = False - - self.name2idx = {} + self.num_workers = num_workers self.k2w_cls_map = { 'Car': label_pb2.Label.TYPE_VEHICLE, @@ -88,193 +52,7 @@ def __init__(self, 'Cyclist': label_pb2.Label.TYPE_CYCLIST, } - if self.from_kitti_format: - self.T_ref_to_front_cam = np.array([[0.0, 0.0, 1.0, 0.0], - [-1.0, 0.0, 0.0, 0.0], - [0.0, -1.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 1.0]]) - # ``sample_idx`` of the sample in kitti-format is an array - for idx, result in enumerate(results): - if len(result['sample_idx']) > 0: - self.name2idx[str(result['sample_idx'][0])] = idx - else: - # ``sample_idx`` of the sample in the original prediction - # is an int value. - for idx, result in enumerate(results): - self.name2idx[str(result['sample_idx'])] = idx - - if not self.fast_eval: - # need to read original '.tfrecord' file - self.get_file_names() - # turn on eager execution for older tensorflow versions - if int(tf.__version__.split('.')[0]) < 2: - tf.enable_eager_execution() - - self.create_folder() - - def get_file_names(self): - """Get file names of waymo raw data.""" - if 'path_mapping' in self.backend_args: - for path in self.backend_args['path_mapping'].keys(): - if path in self.waymo_tfrecords_dir: - self.waymo_tfrecords_dir = \ - self.waymo_tfrecords_dir.replace( - path, self.backend_args['path_mapping'][path]) - from petrel_client.client import Client - client = Client() - contents = client.list(self.waymo_tfrecords_dir) - self.waymo_tfrecord_pathnames = list() - for content in sorted(list(contents)): - if content.endswith('tfrecord'): - self.waymo_tfrecord_pathnames.append( - join(self.waymo_tfrecords_dir, content)) - else: - self.waymo_tfrecord_pathnames = sorted( - glob(join(self.waymo_tfrecords_dir, '*.tfrecord'))) - print(len(self.waymo_tfrecord_pathnames), 'tfrecords found.') - - def create_folder(self): - """Create folder for data conversion.""" - mmengine.mkdir_or_exist(self.waymo_results_save_dir) - - def parse_objects(self, kitti_result, T_k2w, context_name, - frame_timestamp_micros): - """Parse one prediction with several instances in kitti format and - convert them to `Object` proto. - - Args: - kitti_result (dict): Predictions in kitti format. - - - name (np.ndarray): Class labels of predictions. - - dimensions (np.ndarray): Height, width, length of boxes. - - location (np.ndarray): Bottom center of boxes (x, y, z). - - rotation_y (np.ndarray): Orientation of boxes. - - score (np.ndarray): Scores of predictions. - T_k2w (np.ndarray): Transformation matrix from kitti to waymo. - context_name (str): Context name of the frame. - frame_timestamp_micros (int): Frame timestamp. - - Returns: - :obj:`Object`: Predictions in waymo dataset Object proto. - """ - - def parse_one_object(instance_idx): - """Parse one instance in kitti format and convert them to `Object` - proto. - - Args: - instance_idx (int): Index of the instance to be converted. - - Returns: - :obj:`Object`: Predicted instance in waymo dataset - Object proto. - """ - cls = kitti_result['name'][instance_idx] - length = round(kitti_result['dimensions'][instance_idx, 0], 4) - height = round(kitti_result['dimensions'][instance_idx, 1], 4) - width = round(kitti_result['dimensions'][instance_idx, 2], 4) - x = round(kitti_result['location'][instance_idx, 0], 4) - y = round(kitti_result['location'][instance_idx, 1], 4) - z = round(kitti_result['location'][instance_idx, 2], 4) - rotation_y = round(kitti_result['rotation_y'][instance_idx], 4) - score = round(kitti_result['score'][instance_idx], 4) - - # y: downwards; move box origin from bottom center (kitti) to - # true center (waymo) - y -= height / 2 - # frame transformation: kitti -> waymo - x, y, z = self.transform(T_k2w, x, y, z) - - # different conventions - heading = -(rotation_y + np.pi / 2) - while heading < -np.pi: - heading += 2 * np.pi - while heading > np.pi: - heading -= 2 * np.pi - - box = label_pb2.Label.Box() - box.center_x = x - box.center_y = y - box.center_z = z - box.length = length - box.width = width - box.height = height - box.heading = heading - - o = metrics_pb2.Object() - o.object.box.CopyFrom(box) - o.object.type = self.k2w_cls_map[cls] - o.score = score - - o.context_name = context_name - o.frame_timestamp_micros = frame_timestamp_micros - - return o - - objects = metrics_pb2.Objects() - - for instance_idx in range(len(kitti_result['name'])): - o = parse_one_object(instance_idx) - objects.objects.append(o) - - return objects - - def convert_one(self, file_idx): - """Convert action for single file. - - Args: - file_idx (int): Index of the file to be converted. - """ - file_pathname = self.waymo_tfrecord_pathnames[file_idx] - if 's3://' in file_pathname and tf.__version__ >= '2.6.0': - try: - import tensorflow_io as tfio # noqa: F401 - except ImportError: - raise ImportError( - "Please run 'pip install tensorflow-io' to install tensorflow_io first." # noqa: E501 - ) - file_data = tf.data.TFRecordDataset(file_pathname, compression_type='') - - for frame_num, frame_data in enumerate(file_data): - frame = open_dataset.Frame() - frame.ParseFromString(bytearray(frame_data.numpy())) - - filename = f'{self.prefix}{file_idx:03d}{frame_num:03d}' - - context_name = frame.context.name - frame_timestamp_micros = frame.timestamp_micros - - if filename in self.name2idx: - if self.from_kitti_format: - for camera in frame.context.camera_calibrations: - # FRONT = 1, see dataset.proto for details - if camera.name == 1: - T_front_cam_to_vehicle = np.array( - camera.extrinsic.transform).reshape(4, 4) - - T_k2w = T_front_cam_to_vehicle @ self.T_ref_to_front_cam - - kitti_result = \ - self.results[self.name2idx[filename]] - objects = self.parse_objects(kitti_result, T_k2w, - context_name, - frame_timestamp_micros) - else: - index = self.name2idx[filename] - objects = self.parse_objects_from_origin( - self.results[index], context_name, - frame_timestamp_micros) - - else: - print(filename, 'not found.') - objects = metrics_pb2.Objects() - - with open( - join(self.waymo_results_save_dir, f'{filename}.bin'), - 'wb') as f: - f.write(objects.SerializeToString()) - - def convert_one_fast(self, res_index: int): + def convert_one(self, res_index: int): """Convert action for single file. It read the metainfo from the preprocessed file offline and will be faster. @@ -282,19 +60,16 @@ def convert_one_fast(self, res_index: int): res_index (int): The indices of the results. """ sample_idx = self.results[res_index]['sample_idx'] - if len(self.results[res_index]['pred_instances_3d']) > 0: + if len(self.results[res_index]['labels_3d']) > 0: objects = self.parse_objects_from_origin( self.results[res_index], - self.idx2metainfo[str(sample_idx)]['contextname'], - self.idx2metainfo[str(sample_idx)]['timestamp']) + self.results[res_index]['context_name'], + self.results[res_index]['timestamp']) else: print(sample_idx, 'not found.') objects = metrics_pb2.Objects() - with open( - join(self.waymo_results_save_dir, f'{sample_idx}.bin'), - 'wb') as f: - f.write(objects.SerializeToString()) + return objects def parse_objects_from_origin(self, result: dict, contextname: str, timestamp: str) -> Objects: @@ -308,112 +83,56 @@ def parse_objects_from_origin(self, result: dict, contextname: str, Returns: metrics_pb2.Objects: The parsed object. """ - lidar_boxes = result['pred_instances_3d']['bboxes_3d'].tensor - scores = result['pred_instances_3d']['scores_3d'] - labels = result['pred_instances_3d']['labels_3d'] - - def parse_one_object(index): - class_name = self.classes[labels[index].item()] + lidar_boxes = result['bboxes_3d'] + scores = result['scores_3d'] + labels = result['labels_3d'] + objects = metrics_pb2.Objects() + for lidar_box, score, label in zip(lidar_boxes, scores, labels): + # Parse one object box = label_pb2.Label.Box() - height = lidar_boxes[index][5].item() - heading = lidar_boxes[index][6].item() - - while heading < -np.pi: - heading += 2 * np.pi - while heading > np.pi: - heading -= 2 * np.pi - - box.center_x = lidar_boxes[index][0].item() - box.center_y = lidar_boxes[index][1].item() - box.center_z = lidar_boxes[index][2].item() + height / 2 - box.length = lidar_boxes[index][3].item() - box.width = lidar_boxes[index][4].item() + height = lidar_box[5] + heading = lidar_box[6] + + box.center_x = lidar_box[0] + box.center_y = lidar_box[1] + box.center_z = lidar_box[2] + height / 2 + box.length = lidar_box[3] + box.width = lidar_box[4] box.height = height box.heading = heading - o = metrics_pb2.Object() - o.object.box.CopyFrom(box) - o.object.type = self.k2w_cls_map[class_name] - o.score = scores[index].item() - o.context_name = contextname - o.frame_timestamp_micros = timestamp + object = metrics_pb2.Object() + object.object.box.CopyFrom(box) - return o - - objects = metrics_pb2.Objects() - for i in range(len(lidar_boxes)): - objects.objects.append(parse_one_object(i)) + class_name = self.classes[label] + object.object.type = self.k2w_cls_map[class_name] + object.score = score + object.context_name = contextname + object.frame_timestamp_micros = timestamp + objects.objects.append(object) return objects def convert(self): """Convert action.""" - print('Start converting ...') - convert_func = self.convert_one_fast if self.fast_eval else \ - self.convert_one + print_log('Start converting ...', logger='current') - # from torch.multiprocessing import set_sharing_strategy - # # Force using "file_system" sharing strategy for stability - # set_sharing_strategy("file_system") + # TODO: use parallel processes. + # objects_list = mmengine.track_parallel_progress( + # self.convert_one, range(len(self)), self.num_workers) - # mmengine.track_parallel_progress(convert_func, range(len(self)), - # self.workers) + objects_list = mmengine.track_progress(self.convert_one, + range(len(self))) - # TODO: Support multiprocessing. Now, multiprocessing evaluation will - # cause shared memory error in torch-1.10 and torch-1.11. Details can - # be seen in https://github.com/pytorch/pytorch/issues/67864. - prog_bar = mmengine.ProgressBar(len(self)) - for i in range(len(self)): - convert_func(i) - prog_bar.update() - - print('\nFinished ...') - - # combine all files into one .bin - pathnames = sorted(glob(join(self.waymo_results_save_dir, '*.bin'))) - combined = self.combine(pathnames) + combined = metrics_pb2.Objects() + for objects in objects_list: + for o in objects.objects: + combined.objects.append(o) with open(self.waymo_results_final_path, 'wb') as f: f.write(combined.SerializeToString()) def __len__(self): """Length of the filename list.""" - return len(self.results) if self.fast_eval else len( - self.waymo_tfrecord_pathnames) - - def transform(self, T, x, y, z): - """Transform the coordinates with matrix T. - - Args: - T (np.ndarray): Transformation matrix. - x(float): Coordinate in x axis. - y(float): Coordinate in y axis. - z(float): Coordinate in z axis. - - Returns: - list: Coordinates after transformation. - """ - pt_bef = np.array([x, y, z, 1.0]).reshape(4, 1) - pt_aft = np.matmul(T, pt_bef) - return pt_aft[:3].flatten().tolist() - - def combine(self, pathnames): - """Combine predictions in waymo format for each sample together. - - Args: - pathnames (str): Paths to save predictions. - - Returns: - :obj:`Objects`: Combined predictions in Objects proto. - """ - combined = metrics_pb2.Objects() - - for pathname in pathnames: - objects = metrics_pb2.Objects() - with open(pathname, 'rb') as f: - objects.ParseFromString(f.read()) - for o in objects.objects: - combined.objects.append(o) - - return combined + return len(self.results) diff --git a/mmdet3d/evaluation/metrics/waymo_metric.py b/mmdet3d/evaluation/metrics/waymo_metric.py index 0dd69a5c24..41fe429ba8 100644 --- a/mmdet3d/evaluation/metrics/waymo_metric.py +++ b/mmdet3d/evaluation/metrics/waymo_metric.py @@ -1,54 +1,29 @@ # Copyright (c) OpenMMLab. All rights reserved. import tempfile from os import path as osp -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Sequence, Tuple, Union -import mmengine import numpy as np import torch -from mmengine import Config, load +from mmengine import Config +from mmengine.evaluator import BaseMetric from mmengine.logging import MMLogger, print_log from mmdet3d.models.layers import box3d_multiclass_nms from mmdet3d.registry import METRICS from mmdet3d.structures import (Box3DMode, CameraInstance3DBoxes, - LiDARInstance3DBoxes, bbox3d2result, - points_cam2img, xywhr2xyxyr) -from .kitti_metric import KittiMetric + LiDARInstance3DBoxes, points_cam2img, + xywhr2xyxyr) @METRICS.register_module() -class WaymoMetric(KittiMetric): +class WaymoMetric(BaseMetric): """Waymo evaluation metric. Args: - ann_file (str): The path of the annotation file in kitti format. waymo_bin_file (str): The path of the annotation file in waymo format. - data_root (str): Path of dataset root. Used for storing waymo - evaluation programs. - split (str): The split of the evaluation set. Defaults to 'training'. metric (str or List[str]): Metrics to be evaluated. Defaults to 'mAP'. - pcd_limit_range (List[float]): The range of point cloud used to filter - invalid predicted boxes. Defaults to [-85, -85, -5, 85, 85, 5]. - convert_kitti_format (bool): Whether to convert the results to kitti - format. Now, in order to be compatible with camera-based methods, - defaults to True. - prefix (str, optional): The prefix that will be added in the metric - names to disambiguate homonymous metrics of different evaluators. - If prefix is not provided in the argument, self.default_prefix will - be used instead. Defaults to None. - format_only (bool): Format the output results without perform - evaluation. It is useful when you want to format the result to a - specific format and submit it to the test server. - Defaults to False. - pklfile_prefix (str, optional): The prefix of pkl files, including the - file path and the prefix of filename, e.g., "a/b/prefix". If not - specified, a temp file will be created. Defaults to None. - submission_prefix (str, optional): The prefix of submission data. If - not specified, the submission data will not be generated. - Defaults to None. load_type (str): Type of loading mode during training. - - 'frame_based': Load all of the instances in the frame. - 'mv_image_based': Load all of the instances in the frame and need to convert to the FOV-based data type to support image-based @@ -56,73 +31,90 @@ class WaymoMetric(KittiMetric): - 'fov_image_based': Only load the instances inside the default cam and need to convert to the FOV-based data type to support image- based detector. - default_cam_key (str): The default camera for lidar to camera - conversion. By default, KITTI: 'CAM2', Waymo: 'CAM_FRONT'. - Defaults to 'CAM_FRONT'. - use_pred_sample_idx (bool): In formating results, use the sample index - from the prediction or from the load annotations. By default, - KITTI: True, Waymo: False, Waymo has a conversion process, which - needs to use the sample idx from load annotation. - Defaults to False. - collect_device (str): Device name used for collecting results from - different ranks during distributed training. Must be 'cpu' or - 'gpu'. Defaults to 'cpu'. - backend_args (dict, optional): Arguments to instantiate the - corresponding backend. Defaults to None. - idx2metainfo (str, optional): The file path of the metainfo in waymo. - It stores the mapping from sample_idx to metainfo. The metainfo - must contain the keys: 'idx2contextname' and 'idx2timestamp'. + result_prefix (str, optional): The prefix of result '*.bin' file, + including the file path and the prefix of filename, e.g., + "a/b/prefix". If not specified, a temp file will be created. Defaults to None. + format_only (bool): Format the output results without perform + evaluation. It is useful when you want to format the result to a + specific format and submit it to the test server. + Defaults to False. """ num_cams = 5 + default_prefix = 'Waymo metric' def __init__(self, - ann_file: str, waymo_bin_file: str, - data_root: str, - split: str = 'training', metric: Union[str, List[str]] = 'mAP', - pcd_limit_range: List[float] = [-85, -85, -5, 85, 85, 5], - convert_kitti_format: bool = True, - prefix: Optional[str] = None, - format_only: bool = False, - pklfile_prefix: Optional[str] = None, - submission_prefix: Optional[str] = None, load_type: str = 'frame_based', - default_cam_key: str = 'CAM_FRONT', - use_pred_sample_idx: bool = False, - collect_device: str = 'cpu', - backend_args: Optional[dict] = None, - idx2metainfo: Optional[str] = None) -> None: + result_prefix: Optional[str] = None, + format_only: bool = False, + **kwargs) -> None: + super().__init__(**kwargs) self.waymo_bin_file = waymo_bin_file - self.data_root = data_root - self.split = split + self.metrics = metric if isinstance(metric, list) else [metric] self.load_type = load_type - self.use_pred_sample_idx = use_pred_sample_idx - self.convert_kitti_format = convert_kitti_format - - if idx2metainfo is not None: - self.idx2metainfo = mmengine.load(idx2metainfo) - else: - self.idx2metainfo = None - - super(WaymoMetric, self).__init__( - ann_file=ann_file, - metric=metric, - pcd_limit_range=pcd_limit_range, - prefix=prefix, - pklfile_prefix=pklfile_prefix, - submission_prefix=submission_prefix, - default_cam_key=default_cam_key, - collect_device=collect_device, - backend_args=backend_args) self.format_only = format_only + self.result_prefix = result_prefix if self.format_only: - assert pklfile_prefix is not None, 'pklfile_prefix must be not ' + assert result_prefix is not None, 'result_prefix must be not ' 'None when format_only is True, otherwise the result files will ' 'be saved to a temp directory which will be cleaned up at the end.' - self.default_prefix = 'Waymo metric' + def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: + """Process one batch of data samples and predictions. + + The processed results should be stored in ``self.results``, which will + be used to compute the metrics when all batches have been processed. + + Args: + data_batch (dict): A batch of data from the dataloader. + data_samples (Sequence[dict]): A batch of outputs from the model. + """ + + for data_sample in data_samples: + result = dict() + bboxes_3d = data_sample['pred_instances_3d']['bboxes_3d'] + bboxes_3d.limit_yaw(offset=0.5, period=np.pi * 2) + scores_3d = data_sample['pred_instances_3d']['scores_3d'] + labels_3d = data_sample['pred_instances_3d']['labels_3d'] + # TODO: check lidar post-processing + if isinstance(bboxes_3d, CameraInstance3DBoxes): + box_corners = bboxes_3d.corners + cam2img = box_corners.new_tensor( + np.array(data_sample['cam2img'])) + box_corners_in_image = points_cam2img(box_corners, cam2img) + # box_corners_in_image: [N, 8, 2] + minxy = torch.min(box_corners_in_image, dim=1)[0] + maxxy = torch.max(box_corners_in_image, dim=1)[0] + # check minxy & maxxy + # if the projected 2d bbox has intersection + # with the image, we keep it, otherwise, we omit it. + img_shape = data_sample['img_shape'] + valid_inds = ((minxy[:, 0] < img_shape[1]) & + (minxy[:, 1] < img_shape[0]) & (maxxy[:, 0] > 0) + & (maxxy[:, 1] > 0)) + + if valid_inds.sum() > 0: + lidar2cam = data_sample['lidar2cam'] + bboxes_3d = bboxes_3d.convert_to( + Box3DMode.LIDAR, + np.linalg.inv(lidar2cam), + correct_yaw=True) + bboxes_3d = bboxes_3d[valid_inds] + scores_3d = scores_3d[valid_inds] + labels_3d = labels_3d[valid_inds] + else: + bboxes_3d = torch.zeros([0, 7]) + scores_3d = torch.zeros([0]) + labels_3d = torch.zeros([0]) + result['bboxes_3d'] = bboxes_3d.tensor.cpu().numpy() + result['scores_3d'] = scores_3d.cpu().numpy() + result['labels_3d'] = labels_3d.cpu().numpy() + result['sample_idx'] = data_sample['sample_idx'] + result['context_name'] = data_sample['context_name'] + result['timestamp'] = data_sample['timestamp'] + self.results.append(result) def compute_metrics(self, results: List[dict]) -> Dict[str, float]: """Compute the metrics from processed results. @@ -137,80 +129,49 @@ def compute_metrics(self, results: List[dict]) -> Dict[str, float]: logger: MMLogger = MMLogger.get_current_instance() self.classes = self.dataset_meta['classes'] - # load annotations - self.data_infos = load(self.ann_file)['data_list'] - assert len(results) == len(self.data_infos), \ - 'invalid list length of network outputs' # different from kitti, waymo do not need to convert the ann file # handle the mv_image_based load_mode if self.load_type == 'mv_image_based': - new_data_infos = [] - for info in self.data_infos: - height = info['images'][self.default_cam_key]['height'] - width = info['images'][self.default_cam_key]['width'] - for (cam_key, img_info) in info['images'].items(): - camera_info = dict() - camera_info['images'] = dict() - camera_info['images'][cam_key] = img_info - # TODO remove the check by updating the data info; - if 'height' not in img_info: - img_info['height'] = height - img_info['width'] = width - if 'cam_instances' in info \ - and cam_key in info['cam_instances']: - camera_info['instances'] = info['cam_instances'][ - cam_key] - else: - camera_info['instances'] = [] - camera_info['ego2global'] = info['ego2global'] - if 'image_sweeps' in info: - camera_info['image_sweeps'] = info['image_sweeps'] - - # TODO check if need to modify the sample idx - # TODO check when will use it except for evaluation. - camera_info['sample_idx'] = info['sample_idx'] - new_data_infos.append(camera_info) - self.data_infos = new_data_infos - - if self.pklfile_prefix is None: + assert len(results) % 5 == 0, 'The multi-view image-based results' + ' must be 5 times as large as the original frame-based results.' + frame_results = [ + results[i:i + 5] for i in range(0, len(results), 5) + ] + results = self.merge_multi_view_boxes(frame_results) + + if self.result_prefix is None: eval_tmp_dir = tempfile.TemporaryDirectory() - pklfile_prefix = osp.join(eval_tmp_dir.name, 'results') + result_prefix = osp.join(eval_tmp_dir.name, 'results') else: eval_tmp_dir = None - pklfile_prefix = self.pklfile_prefix + result_prefix = self.result_prefix - result_dict, tmp_dir = self.format_results( - results, - pklfile_prefix=pklfile_prefix, - submission_prefix=self.submission_prefix, - classes=self.classes) + self.format_results(results, result_prefix=result_prefix) metric_dict = {} if self.format_only: logger.info('results are saved in ' - f'{osp.dirname(self.pklfile_prefix)}') + f'{osp.dirname(self.result_prefix)}') return metric_dict for metric in self.metrics: ap_dict = self.waymo_evaluate( - pklfile_prefix, metric=metric, logger=logger) + result_prefix, metric=metric, logger=logger) metric_dict.update(ap_dict) if eval_tmp_dir is not None: eval_tmp_dir.cleanup() - if tmp_dir is not None: - tmp_dir.cleanup() return metric_dict def waymo_evaluate(self, - pklfile_prefix: str, + result_prefix: str, metric: Optional[str] = None, logger: Optional[MMLogger] = None) -> Dict[str, float]: """Evaluation in Waymo protocol. Args: - pklfile_prefix (str): The location that stored the prediction + result_prefix (str): The location that stored the prediction results. metric (str, optional): Metric to be evaluated. Defaults to None. logger (MMLogger, optional): Logger used for printing related @@ -224,7 +185,7 @@ def waymo_evaluate(self, if metric == 'mAP': eval_str = 'mmdet3d/evaluation/functional/waymo_utils/' + \ - f'compute_detection_metrics_main {pklfile_prefix}.bin ' + \ + f'compute_detection_metrics_main {result_prefix}.bin ' + \ f'{self.waymo_bin_file}' print(eval_str) ret_bytes = subprocess.check_output(eval_str, shell=True) @@ -275,7 +236,7 @@ def waymo_evaluate(self, ap_dict['Cyclist/L2 mAPH']) / 3 elif metric == 'LET_mAP': eval_str = 'mmdet3d/evaluation/functional/waymo_utils/' + \ - f'compute_detection_let_metrics_main {pklfile_prefix}.bin ' + \ + f'compute_detection_let_metrics_main {result_prefix}.bin ' + \ f'{self.waymo_bin_file}' print(eval_str) @@ -325,76 +286,26 @@ def waymo_evaluate(self, def format_results( self, results: List[dict], - pklfile_prefix: Optional[str] = None, - submission_prefix: Optional[str] = None, - classes: Optional[List[str]] = None + result_prefix: Optional[str] = None ) -> Tuple[dict, Union[tempfile.TemporaryDirectory, None]]: """Format the results to bin file. Args: results (List[dict]): Testing results of the dataset. - pklfile_prefix (str, optional): The prefix of pkl files. It + result_prefix (str, optional): The prefix of result file. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Defaults to None. - submission_prefix (str, optional): The prefix of submitted files. - It includes the file path and the prefix of filename, e.g., - "a/b/prefix". If not specified, a temp file will be created. - Defaults to None. - classes (List[str], optional): A list of class name. - Defaults to None. - - Returns: - tuple: (result_dict, tmp_dir), result_dict is a dict containing the - formatted result, tmp_dir is the temporal directory created for - saving json files when jsonfile_prefix is not specified. """ - waymo_save_tmp_dir = tempfile.TemporaryDirectory() - waymo_results_save_dir = waymo_save_tmp_dir.name - waymo_results_final_path = f'{pklfile_prefix}.bin' - - if self.convert_kitti_format: - results_kitti_format, tmp_dir = super().format_results( - results, pklfile_prefix, submission_prefix, classes) - final_results = results_kitti_format['pred_instances_3d'] - else: - final_results = results - for i, res in enumerate(final_results): - # Actually, `sample_idx` here is the filename without suffix. - # It's for identitying the sample in formating. - res['sample_idx'] = self.data_infos[i]['sample_idx'] - res['pred_instances_3d']['bboxes_3d'].limit_yaw( - offset=0.5, period=np.pi * 2) - - waymo_root = self.data_root - if self.split == 'training': - waymo_tfrecords_dir = osp.join(waymo_root, 'validation') - prefix = '1' - elif self.split == 'testing': - waymo_tfrecords_dir = osp.join(waymo_root, 'testing') - prefix = '2' - else: - raise ValueError('Not supported split value.') + waymo_results_final_path = f'{result_prefix}.bin' from ..functional.waymo_utils.prediction_to_waymo import \ Prediction2Waymo - converter = Prediction2Waymo( - final_results, - waymo_tfrecords_dir, - waymo_results_save_dir, - waymo_results_final_path, - prefix, - classes, - backend_args=self.backend_args, - from_kitti_format=self.convert_kitti_format, - idx2metainfo=self.idx2metainfo) + converter = Prediction2Waymo(results, waymo_results_final_path, + self.classes) converter.convert() - waymo_save_tmp_dir.cleanup() - - return final_results, waymo_save_tmp_dir - def merge_multi_view_boxes(self, box_dict_per_frame: List[dict], - cam0_info: dict) -> dict: + def merge_multi_view_boxes(self, frame_results) -> dict: """Merge bounding boxes predicted from multi-view images. Args: @@ -403,308 +314,49 @@ def merge_multi_view_boxes(self, box_dict_per_frame: List[dict], cam0_info (dict): Store the sample idx for the given frame. Returns: - dict: Merged results. - """ - box_dict = dict() - # convert list[dict] to dict[list] - for key in box_dict_per_frame[0].keys(): - box_dict[key] = list() - for cam_idx in range(self.num_cams): - box_dict[key].append(box_dict_per_frame[cam_idx][key]) - # merge each elements - box_dict['sample_idx'] = cam0_info['image_id'] - for key in ['bbox', 'box3d_lidar', 'scores', 'label_preds']: - box_dict[key] = np.concatenate(box_dict[key]) - - # apply nms to box3d_lidar (box3d_camera are in different systems) - # TODO: move this global setting into config - nms_cfg = dict( - use_rotate_nms=True, - nms_across_levels=False, - nms_pre=500, - nms_thr=0.05, - score_thr=0.001, - min_bbox_size=0, - max_per_frame=100) - nms_cfg = Config(nms_cfg) - lidar_boxes3d = LiDARInstance3DBoxes( - torch.from_numpy(box_dict['box3d_lidar']).cuda()) - scores = torch.from_numpy(box_dict['scores']).cuda() - labels = torch.from_numpy(box_dict['label_preds']).long().cuda() - nms_scores = scores.new_zeros(scores.shape[0], len(self.classes) + 1) - indices = labels.new_tensor(list(range(scores.shape[0]))) - nms_scores[indices, labels] = scores - lidar_boxes3d_for_nms = xywhr2xyxyr(lidar_boxes3d.bev) - boxes3d = lidar_boxes3d.tensor - # generate attr scores from attr labels - boxes3d, scores, labels = box3d_multiclass_nms( - boxes3d, lidar_boxes3d_for_nms, nms_scores, nms_cfg.score_thr, - nms_cfg.max_per_frame, nms_cfg) - lidar_boxes3d = LiDARInstance3DBoxes(boxes3d) - det = bbox3d2result(lidar_boxes3d, scores, labels) - box_preds_lidar = det['bboxes_3d'] - scores = det['scores_3d'] - labels = det['labels_3d'] - # box_preds_camera is in the cam0 system - lidar2cam = cam0_info['images'][self.default_cam_key]['lidar2img'] - lidar2cam = np.array(lidar2cam).astype(np.float32) - box_preds_camera = box_preds_lidar.convert_to( - Box3DMode.CAM, lidar2cam, correct_yaw=True) - # Note: bbox is meaningless in final evaluation, set to 0 - merged_box_dict = dict( - bbox=np.zeros([box_preds_lidar.tensor.shape[0], 4]), - box3d_camera=box_preds_camera.numpy(), - box3d_lidar=box_preds_lidar.numpy(), - scores=scores.numpy(), - label_preds=labels.numpy(), - sample_idx=box_dict['sample_idx'], - ) - return merged_box_dict - - def bbox2result_kitti( - self, - net_outputs: List[dict], - sample_idx_list: List[int], - class_names: List[str], - pklfile_prefix: Optional[str] = None, - submission_prefix: Optional[str] = None) -> List[dict]: - """Convert 3D detection results to kitti format for evaluation and test - submission. - - Args: - net_outputs (List[dict]): List of dict storing the inferenced - bounding boxes and scores. - sample_idx_list (List[int]): List of input sample idx. - class_names (List[str]): A list of class names. - pklfile_prefix (str, optional): The prefix of pkl file. - Defaults to None. - submission_prefix (str, optional): The prefix of submission file. - Defaults to None. - - Returns: - List[dict]: A list of dictionaries with the kitti format. + Dict: Merged results. """ - if submission_prefix is not None: - mmengine.mkdir_or_exist(submission_prefix) - - det_annos = [] - print('\nConverting prediction to KITTI format') - for idx, pred_dicts in enumerate( - mmengine.track_iter_progress(net_outputs)): - sample_idx = sample_idx_list[idx] - info = self.data_infos[sample_idx] - - if self.load_type == 'mv_image_based': - if idx % self.num_cams == 0: - box_dict_per_frame = [] - cam0_key = list(info['images'].keys())[0] - cam0_info = info - # Here in mono3d, we use the 'CAM_FRONT' "the first - # index in the camera" as the default image shape. - # If you want to another camera, please modify it. - image_shape = (info['images'][cam0_key]['height'], - info['images'][cam0_key]['width']) - box_dict = self.convert_valid_bboxes(pred_dicts, info) - else: - box_dict = self.convert_valid_bboxes(pred_dicts, info) - # Here default used 'CAM_FRONT' to compute metric. - # If you want to use another camera, please modify it. - image_shape = (info['images'][self.default_cam_key]['height'], - info['images'][self.default_cam_key]['width']) - if self.load_type == 'mv_image_based': - box_dict_per_frame.append(box_dict) - if (idx + 1) % self.num_cams != 0: - continue - box_dict = self.merge_multi_view_boxes(box_dict_per_frame, - cam0_info) - - anno = { - 'name': [], - 'truncated': [], - 'occluded': [], - 'alpha': [], - 'bbox': [], - 'dimensions': [], - 'location': [], - 'rotation_y': [], - 'score': [] - } - if len(box_dict['bbox']) > 0: - box_2d_preds = box_dict['bbox'] - box_preds = box_dict['box3d_camera'] - scores = box_dict['scores'] - box_preds_lidar = box_dict['box3d_lidar'] - label_preds = box_dict['label_preds'] - - for box, box_lidar, bbox, score, label in zip( - box_preds, box_preds_lidar, box_2d_preds, scores, - label_preds): - bbox[2:] = np.minimum(bbox[2:], image_shape[::-1]) - bbox[:2] = np.maximum(bbox[:2], [0, 0]) - anno['name'].append(class_names[int(label)]) - anno['truncated'].append(0.0) - anno['occluded'].append(0) - anno['alpha'].append( - -np.arctan2(-box_lidar[1], box_lidar[0]) + box[6]) - anno['bbox'].append(bbox) - anno['dimensions'].append(box[3:6]) - anno['location'].append(box[:3]) - anno['rotation_y'].append(box[6]) - anno['score'].append(score) - - anno = {k: np.stack(v) for k, v in anno.items()} - else: - anno = { - 'name': np.array([]), - 'truncated': np.array([]), - 'occluded': np.array([]), - 'alpha': np.array([]), - 'bbox': np.zeros([0, 4]), - 'dimensions': np.zeros([0, 3]), - 'location': np.zeros([0, 3]), - 'rotation_y': np.array([]), - 'score': np.array([]), - } - - if submission_prefix is not None: - curr_file = f'{submission_prefix}/{sample_idx:06d}.txt' - with open(curr_file, 'w') as f: - bbox = anno['bbox'] - loc = anno['location'] - dims = anno['dimensions'] # lhw -> hwl - - for idx in range(len(bbox)): - print( - '{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} ' - '{:.4f} {:.4f} {:.4f} ' - '{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.format( - anno['name'][idx], anno['alpha'][idx], - bbox[idx][0], bbox[idx][1], bbox[idx][2], - bbox[idx][3], dims[idx][1], dims[idx][2], - dims[idx][0], loc[idx][0], loc[idx][1], - loc[idx][2], anno['rotation_y'][idx], - anno['score'][idx]), - file=f) - if self.use_pred_sample_idx: - save_sample_idx = sample_idx - else: - # use the sample idx in the info file - # In waymo validation sample_idx in prediction is 000xxx - # but in info file it is 1000xxx - save_sample_idx = box_dict['sample_idx'] - anno['sample_idx'] = np.array( - [save_sample_idx] * len(anno['score']), dtype=np.int64) - - det_annos.append(anno) - - if pklfile_prefix is not None: - if not pklfile_prefix.endswith(('.pkl', '.pickle')): - out = f'{pklfile_prefix}.pkl' - else: - out = pklfile_prefix - mmengine.dump(det_annos, out) - print(f'Result is saved to {out}.') - - return det_annos - - def convert_valid_bboxes(self, box_dict: dict, info: dict) -> dict: - """Convert the predicted boxes into valid ones. Should handle the - load_model (frame_based, mv_image_based, fov_image_based), separately. - - Args: - box_dict (dict): Box dictionaries to be converted. - - - bboxes_3d (:obj:`BaseInstance3DBoxes`): 3D bounding boxes. - - scores_3d (Tensor): Scores of boxes. - - labels_3d (Tensor): Class labels of boxes. - info (dict): Data info. - - Returns: - dict: Valid predicted boxes. - - - bbox (np.ndarray): 2D bounding boxes. - - box3d_camera (np.ndarray): 3D bounding boxes in camera - coordinate. - - box3d_lidar (np.ndarray): 3D bounding boxes in LiDAR coordinate. - - scores (np.ndarray): Scores of boxes. - - label_preds (np.ndarray): Class label predictions. - - sample_idx (int): Sample index. - """ - # TODO: refactor this function - box_preds = box_dict['bboxes_3d'] - scores = box_dict['scores_3d'] - labels = box_dict['labels_3d'] - sample_idx = info['sample_idx'] - box_preds.limit_yaw(offset=0.5, period=np.pi * 2) - - if len(box_preds) == 0: - return dict( - bbox=np.zeros([0, 4]), - box3d_camera=np.zeros([0, 7]), - box3d_lidar=np.zeros([0, 7]), - scores=np.zeros([0]), - label_preds=np.zeros([0, 4]), - sample_idx=sample_idx) - # Here default used 'CAM_FRONT' to compute metric. If you want to - # use another camera, please modify it. - if self.load_type in ['frame_based', 'fov_image_based']: - cam_key = self.default_cam_key - elif self.load_type == 'mv_image_based': - cam_key = list(info['images'].keys())[0] - else: - raise NotImplementedError - - lidar2cam = np.array(info['images'][cam_key]['lidar2cam']).astype( - np.float32) - P2 = np.array(info['images'][cam_key]['cam2img']).astype(np.float32) - img_shape = (info['images'][cam_key]['height'], - info['images'][cam_key]['width']) - P2 = box_preds.tensor.new_tensor(P2) - - if isinstance(box_preds, LiDARInstance3DBoxes): - box_preds_camera = box_preds.convert_to(Box3DMode.CAM, lidar2cam) - box_preds_lidar = box_preds - elif isinstance(box_preds, CameraInstance3DBoxes): - box_preds_camera = box_preds - box_preds_lidar = box_preds.convert_to(Box3DMode.LIDAR, - np.linalg.inv(lidar2cam)) - - box_corners = box_preds_camera.corners - box_corners_in_image = points_cam2img(box_corners, P2) - # box_corners_in_image: [N, 8, 2] - minxy = torch.min(box_corners_in_image, dim=1)[0] - maxxy = torch.max(box_corners_in_image, dim=1)[0] - box_2d_preds = torch.cat([minxy, maxxy], dim=1) - # Post-processing - # check box_preds_camera - image_shape = box_preds.tensor.new_tensor(img_shape) - valid_cam_inds = ((box_2d_preds[:, 0] < image_shape[1]) & - (box_2d_preds[:, 1] < image_shape[0]) & - (box_2d_preds[:, 2] > 0) & (box_2d_preds[:, 3] > 0)) - # check box_preds_lidar - if self.load_type in ['frame_based']: - limit_range = box_preds.tensor.new_tensor(self.pcd_limit_range) - valid_pcd_inds = ((box_preds_lidar.center > limit_range[:3]) & - (box_preds_lidar.center < limit_range[3:])) - valid_inds = valid_pcd_inds.all(-1) - elif self.load_type in ['mv_image_based', 'fov_image_based']: - valid_inds = valid_cam_inds - - if valid_inds.sum() > 0: - return dict( - bbox=box_2d_preds[valid_inds, :].numpy(), - pred_box_type_3d=type(box_preds), - box3d_camera=box_preds_camera[valid_inds].numpy(), - box3d_lidar=box_preds_lidar[valid_inds].numpy(), - scores=scores[valid_inds].numpy(), - label_preds=labels[valid_inds].numpy(), - sample_idx=sample_idx) - else: - return dict( - bbox=np.zeros([0, 4]), - pred_box_type_3d=type(box_preds), - box3d_camera=np.zeros([0, 7]), - box3d_lidar=np.zeros([0, 7]), - scores=np.zeros([0]), - label_preds=np.zeros([0]), - sample_idx=sample_idx) + merged_results = [] + for frame_result in frame_results: + merged_result = dict() + merged_result['sample_idx'] = frame_result[0]['sample_idx'] // 5 + merged_result['context_name'] = frame_result[0]['context_name'] + merged_result['timestamp'] = frame_result[0]['timestamp'] + bboxes_3d, scores_3d, labels_3d = [], [], [] + for result in frame_result: + assert result['timestamp'] == merged_result['timestamp'] + bboxes_3d.append(result['bboxes_3d']) + scores_3d.append(result['scores_3d']) + labels_3d.append(result['labels_3d']) + + bboxes_3d = np.concatenate(bboxes_3d) + scores_3d = np.concatenate(scores_3d) + labels_3d = np.concatenate(labels_3d) + nms_cfg = dict( + use_rotate_nms=True, + nms_across_levels=False, + nms_pre=500, + nms_thr=0.05, + score_thr=0.001, + min_bbox_size=0, + max_per_frame=100) + nms_cfg = Config(nms_cfg) + lidar_boxes3d = LiDARInstance3DBoxes( + torch.from_numpy(bboxes_3d).cuda()) + scores = torch.from_numpy(scores_3d).cuda() + labels = torch.from_numpy(labels_3d).long().cuda() + nms_scores = scores.new_zeros(scores.shape[0], + len(self.classes) + 1) + indices = labels.new_tensor(list(range(scores.shape[0]))) + nms_scores[indices, labels] = scores + lidar_boxes3d_for_nms = xywhr2xyxyr(lidar_boxes3d.bev) + boxes3d = lidar_boxes3d.tensor + bboxes_3d, scores_3d, labels_3d = box3d_multiclass_nms( + boxes3d, lidar_boxes3d_for_nms, nms_scores, nms_cfg.score_thr, + nms_cfg.max_per_frame, nms_cfg) + + merged_result['bboxes_3d'] = bboxes_3d.cpu().numpy() + merged_result['scores_3d'] = scores_3d.cpu().numpy() + merged_result['labels_3d'] = labels_3d.cpu().numpy() + merged_results.append(merged_result) + return merged_results diff --git a/projects/CenterFormer/configs/centerformer_voxel01_second-attn_secfpn-attn_4xb4-cyclic-20e_waymoD5-3d-3class.py b/projects/CenterFormer/configs/centerformer_voxel01_second-attn_secfpn-attn_4xb4-cyclic-20e_waymoD5-3d-3class.py index 14bcbb9296..5b207c7992 100644 --- a/projects/CenterFormer/configs/centerformer_voxel01_second-attn_secfpn-attn_4xb4-cyclic-20e_waymoD5-3d-3class.py +++ b/projects/CenterFormer/configs/centerformer_voxel01_second-attn_secfpn-attn_4xb4-cyclic-20e_waymoD5-3d-3class.py @@ -179,7 +179,10 @@ dict( type='PointsRangeFilter', point_cloud_range=point_cloud_range) ]), - dict(type='Pack3DDetInputs', keys=['points']) + dict( + type='Pack3DDetInputs', + keys=['points'], + meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp']) ] dataset_type = 'WaymoDataset' @@ -223,13 +226,7 @@ test_dataloader = val_dataloader val_evaluator = dict( - type='WaymoMetric', - ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl', - waymo_bin_file='./data/waymo/waymo_format/gt.bin', - data_root='./data/waymo/waymo_format', - backend_args=backend_args, - convert_kitti_format=False, - idx2metainfo='./data/waymo/waymo_format/idx2metainfo.pkl') + type='WaymoMetric', waymo_bin_file='./data/waymo/waymo_format/gt.bin') test_evaluator = val_evaluator vis_backends = [dict(type='LocalVisBackend')] diff --git a/tools/create_data.py b/tools/create_data.py index 34356c2a8f..384fa87127 100644 --- a/tools/create_data.py +++ b/tools/create_data.py @@ -2,6 +2,8 @@ import argparse from os import path as osp +from mmengine import print_log + from tools.dataset_converters import indoor_converter as indoor from tools.dataset_converters import kitti_converter as kitti from tools.dataset_converters import lyft_converter as lyft_converter @@ -171,8 +173,19 @@ def waymo_data_prep(root_path, version, out_dir, workers, - max_sweeps=5): - """Prepare the info file for waymo dataset. + max_sweeps=10, + only_gt_database=False, + skip_image_and_lidar=False, + skip_cam_instances_infos=False): + """Prepare waymo dataset. There are 3 steps as follows: + + Step 1. Extract camera images and lidar point clouds from waymo raw + data in '*.tfreord' and save as kitti format. + Step 2. Generate waymo train/val/test infos and save as pickle file. + Step 3. Generate waymo ground truth database (point clouds within + each 3D bounding box) for data augmentation in training. + Steps 1 and 2 will be done in Waymo2KITTI, and step 3 will be done in + GTDatabaseCreater. Args: root_path (str): Path of dataset root. @@ -180,44 +193,55 @@ def waymo_data_prep(root_path, out_dir (str): Output directory of the generated info file. workers (int): Number of threads to be used. max_sweeps (int, optional): Number of input consecutive frames. - Default: 5. Here we store pose information of these frames - for later use. + Default to 10. Here we store ego2global information of these + frames for later use. + only_gt_database (bool, optional): Whether to only generate ground + truth database. Default to False. + skip_image_and_lidar (bool, optional): Whether to skip saving + image and lidar. Default to False. + skip_cam_instances_infos (bool, optional): Whether to skip + gathering cam_instances infos in Step 2. Default to False. """ from tools.dataset_converters import waymo_converter as waymo - splits = [ - 'training', 'validation', 'testing', 'testing_3d_camera_only_detection' - ] - for i, split in enumerate(splits): - load_dir = osp.join(root_path, 'waymo_format', split) - if split == 'validation': - save_dir = osp.join(out_dir, 'kitti_format', 'training') - else: - save_dir = osp.join(out_dir, 'kitti_format', split) - converter = waymo.Waymo2KITTI( - load_dir, - save_dir, - prefix=str(i), - workers=workers, - test_mode=(split - in ['testing', 'testing_3d_camera_only_detection'])) - converter.convert() - - from tools.dataset_converters.waymo_converter import \ - create_ImageSets_img_ids - create_ImageSets_img_ids(osp.join(out_dir, 'kitti_format'), splits) - # Generate waymo infos + if version == 'v1.4': + splits = [ + 'training', 'validation', 'testing', + 'testing_3d_camera_only_detection' + ] + elif version == 'v1.4-mini': + splits = ['training', 'validation'] + else: + raise NotImplementedError(f'Unsupported Waymo version {version}!') out_dir = osp.join(out_dir, 'kitti_format') - kitti.create_waymo_info_file( - out_dir, info_prefix, max_sweeps=max_sweeps, workers=workers) - info_train_path = osp.join(out_dir, f'{info_prefix}_infos_train.pkl') - info_val_path = osp.join(out_dir, f'{info_prefix}_infos_val.pkl') - info_trainval_path = osp.join(out_dir, f'{info_prefix}_infos_trainval.pkl') - info_test_path = osp.join(out_dir, f'{info_prefix}_infos_test.pkl') - update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_train_path) - update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_val_path) - update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_trainval_path) - update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_test_path) + + if not only_gt_database: + for i, split in enumerate(splits): + load_dir = osp.join(root_path, 'waymo_format', split) + if split == 'validation': + save_dir = osp.join(out_dir, 'training') + else: + save_dir = osp.join(out_dir, split) + converter = waymo.Waymo2KITTI( + load_dir, + save_dir, + prefix=str(i), + workers=workers, + test_mode=(split + in ['testing', 'testing_3d_camera_only_detection']), + info_prefix=info_prefix, + max_sweeps=max_sweeps, + split=split, + save_image_and_lidar=not skip_image_and_lidar, + save_cam_instances=not skip_cam_instances_infos) + converter.convert() + if split == 'validation': + converter.merge_trainval_infos() + + from tools.dataset_converters.waymo_converter import \ + create_ImageSets_img_ids + create_ImageSets_img_ids(out_dir, splits) + GTDatabaseCreater( 'WaymoDataset', out_dir, @@ -227,6 +251,8 @@ def waymo_data_prep(root_path, with_mask=False, num_worker=workers).create() + print_log('Successfully preparing Waymo Open Dataset') + def semantickitti_data_prep(info_prefix, out_dir): """Prepare the info file for SemanticKITTI dataset. @@ -274,12 +300,23 @@ def semantickitti_data_prep(info_prefix, out_dir): parser.add_argument( '--only-gt-database', action='store_true', - help='Whether to only generate ground truth database.') + help='''Whether to only generate ground truth database. + Only used when dataset is NuScenes or Waymo!''') +parser.add_argument( + '--skip-cam_instances-infos', + action='store_true', + help='''Whether to skip gathering cam_instances infos. + Only used when dataset is Waymo!''') +parser.add_argument( + '--skip-image-and-lidar', + action='store_true', + help='''Whether to skip saving image and lidar. + Only used when dataset is Waymo!''') args = parser.parse_args() if __name__ == '__main__': - from mmdet3d.utils import register_all_modules - register_all_modules() + from mmengine.registry import init_default_scope + init_default_scope('mmdet3d') if args.dataset == 'kitti': if args.only_gt_database: @@ -334,6 +371,17 @@ def semantickitti_data_prep(info_prefix, out_dir): dataset_name='NuScenesDataset', out_dir=args.out_dir, max_sweeps=args.max_sweeps) + elif args.dataset == 'waymo': + waymo_data_prep( + root_path=args.root_path, + info_prefix=args.extra_tag, + version=args.version, + out_dir=args.out_dir, + workers=args.workers, + max_sweeps=args.max_sweeps, + only_gt_database=args.only_gt_database, + skip_image_and_lidar=args.skip_image_and_lidar, + skip_cam_instances_infos=args.skip_cam_instances_infos) elif args.dataset == 'lyft': train_version = f'{args.version}-train' lyft_data_prep( @@ -347,14 +395,6 @@ def semantickitti_data_prep(info_prefix, out_dir): info_prefix=args.extra_tag, version=test_version, max_sweeps=args.max_sweeps) - elif args.dataset == 'waymo': - waymo_data_prep( - root_path=args.root_path, - info_prefix=args.extra_tag, - version=args.version, - out_dir=args.out_dir, - workers=args.workers, - max_sweeps=args.max_sweeps) elif args.dataset == 'scannet': scannet_data_prep( root_path=args.root_path, diff --git a/tools/create_data.sh b/tools/create_data.sh index 9a57852f71..0a1946585d 100755 --- a/tools/create_data.sh +++ b/tools/create_data.sh @@ -6,10 +6,11 @@ export PYTHONPATH=`pwd`:$PYTHONPATH PARTITION=$1 JOB_NAME=$2 DATASET=$3 +WORKERS=$4 GPUS=${GPUS:-1} GPUS_PER_NODE=${GPUS_PER_NODE:-1} SRUN_ARGS=${SRUN_ARGS:-""} -JOB_NAME=create_data +PY_ARGS=${@:5} srun -p ${PARTITION} \ --job-name=${JOB_NAME} \ @@ -21,4 +22,6 @@ srun -p ${PARTITION} \ python -u tools/create_data.py ${DATASET} \ --root-path ./data/${DATASET} \ --out-dir ./data/${DATASET} \ - --extra-tag ${DATASET} + --workers ${WORKERS} \ + --extra-tag ${DATASET} \ + ${PY_ARGS} diff --git a/tools/dataset_converters/create_gt_database.py b/tools/dataset_converters/create_gt_database.py index ae452eb543..fb84256fd8 100644 --- a/tools/dataset_converters/create_gt_database.py +++ b/tools/dataset_converters/create_gt_database.py @@ -7,7 +7,7 @@ import numpy as np from mmcv.ops import roi_align from mmdet.evaluation import bbox_overlaps -from mmengine import track_iter_progress +from mmengine import print_log, track_iter_progress from pycocotools import mask as maskUtils from pycocotools.coco import COCO @@ -504,7 +504,9 @@ def create_single(self, input_dict): return single_db_infos def create(self): - print(f'Create GT Database of {self.dataset_class_name}') + print_log( + f'Create GT Database of {self.dataset_class_name}', + logger='current') dataset_cfg = dict( type=self.dataset_class_name, data_root=self.data_path, @@ -610,12 +612,19 @@ def loop_dataset(i): input_dict['box_mode_3d'] = self.dataset.box_mode_3d return input_dict - multi_db_infos = mmengine.track_parallel_progress( - self.create_single, - ((loop_dataset(i) - for i in range(len(self.dataset))), len(self.dataset)), - self.num_worker) - print('Make global unique group id') + if self.num_worker == 0: + multi_db_infos = mmengine.track_progress( + self.create_single, + ((loop_dataset(i) + for i in range(len(self.dataset))), len(self.dataset))) + else: + multi_db_infos = mmengine.track_parallel_progress( + self.create_single, + ((loop_dataset(i) + for i in range(len(self.dataset))), len(self.dataset)), + self.num_worker, + chunksize=1000) + print_log('Make global unique group id', logger='current') group_counter_offset = 0 all_db_infos = dict() for single_db_infos in track_iter_progress(multi_db_infos): @@ -630,7 +639,8 @@ def loop_dataset(i): group_counter_offset += (group_id + 1) for k, v in all_db_infos.items(): - print(f'load {len(v)} {k} database infos') + print_log(f'load {len(v)} {k} database infos', logger='current') + print_log(f'Saving GT database infos into {self.db_info_save_path}') with open(self.db_info_save_path, 'wb') as f: pickle.dump(all_db_infos, f) diff --git a/tools/dataset_converters/waymo_converter.py b/tools/dataset_converters/waymo_converter.py index 87f9c54b54..00eba35daa 100644 --- a/tools/dataset_converters/waymo_converter.py +++ b/tools/dataset_converters/waymo_converter.py @@ -9,23 +9,33 @@ raise ImportError('Please run "pip install waymo-open-dataset-tf-2-6-0" ' '>1.4.5 to install the official devkit first.') +import copy import os +import os.path as osp from glob import glob +from io import BytesIO from os.path import exists, join import mmengine import numpy as np import tensorflow as tf +from mmengine import print_log +from nuscenes.utils.geometry_utils import view_points +from PIL import Image from waymo_open_dataset.utils import range_image_utils, transform_utils from waymo_open_dataset.utils.frame_utils import \ parse_range_image_and_camera_projection +from mmdet3d.datasets.convert_utils import post_process_coords +from mmdet3d.structures import Box3DMode, LiDARInstance3DBoxes, points_cam2img + class Waymo2KITTI(object): - """Waymo to KITTI converter. + """Waymo to KITTI converter. There are 2 steps as follows: - This class serves as the converter to change the waymo raw data to KITTI - format. + Step 1. Extract camera images and lidar point clouds from waymo raw data in + '*.tfreord' and save as kitti format. + Step 2. Generate waymo train/val/test infos and save as pickle file. Args: load_dir (str): Directory to load waymo raw data. @@ -36,8 +46,16 @@ class Waymo2KITTI(object): Defaults to 64. test_mode (bool, optional): Whether in the test_mode. Defaults to False. - save_cam_sync_labels (bool, optional): Whether to save cam sync labels. - Defaults to True. + save_image_and_lidar (bool, optional): Whether to save image and lidar + data. Defaults to True. + save_cam_sync_instances (bool, optional): Whether to save cam sync + instances. Defaults to True. + save_cam_instances (bool, optional): Whether to save cam instances. + Defaults to False. + info_prefix (str, optional): Prefix of info filename. + Defaults to 'waymo'. + max_sweeps (int, optional): Max length of sweeps. Defaults to 10. + split (str, optional): Split of the data. Defaults to 'training'. """ def __init__(self, @@ -46,18 +64,12 @@ def __init__(self, prefix, workers=64, test_mode=False, - save_cam_sync_labels=True): - self.filter_empty_3dboxes = True - self.filter_no_label_zone_points = True - - self.selected_waymo_classes = ['VEHICLE', 'PEDESTRIAN', 'CYCLIST'] - - # Only data collected in specific locations will be converted - # If set None, this filter is disabled - # Available options: location_sf (main dataset) - self.selected_waymo_locations = None - self.save_track_id = False - + save_image_and_lidar=True, + save_cam_sync_instances=True, + save_cam_instances=True, + info_prefix='waymo', + max_sweeps=10, + split='training'): # turn on eager execution for older tensorflow versions if int(tf.__version__.split('.')[0]) < 2: tf.enable_eager_execution() @@ -74,12 +86,21 @@ def __init__(self, self.type_list = [ 'UNKNOWN', 'VEHICLE', 'PEDESTRIAN', 'SIGN', 'CYCLIST' ] - self.waymo_to_kitti_class_map = { - 'UNKNOWN': 'DontCare', - 'PEDESTRIAN': 'Pedestrian', - 'VEHICLE': 'Car', - 'CYCLIST': 'Cyclist', - 'SIGN': 'Sign' # not in kitti + + # MMDetection3D unified camera keys & class names + self.camera_types = [ + 'CAM_FRONT', + 'CAM_FRONT_LEFT', + 'CAM_FRONT_RIGHT', + 'CAM_SIDE_LEFT', + 'CAM_SIDE_RIGHT', + ] + self.selected_waymo_classes = ['VEHICLE', 'PEDESTRIAN', 'CYCLIST'] + self.info_map = { + 'training': '_infos_train.pkl', + 'validation': '_infos_val.pkl', + 'testing': '_infos_test.pkl', + 'testing_3d_camera_only_detection': '_infos_test_cam_only.pkl' } self.load_dir = load_dir @@ -87,61 +108,86 @@ def __init__(self, self.prefix = prefix self.workers = int(workers) self.test_mode = test_mode - self.save_cam_sync_labels = save_cam_sync_labels + self.save_image_and_lidar = save_image_and_lidar + self.save_cam_sync_instances = save_cam_sync_instances + self.save_cam_instances = save_cam_instances + self.info_prefix = info_prefix + self.max_sweeps = max_sweeps + self.split = split + + # TODO: Discuss filter_empty_3dboxes and filter_no_label_zone_points + self.filter_empty_3dboxes = True + self.filter_no_label_zone_points = True + self.save_track_id = False self.tfrecord_pathnames = sorted( glob(join(self.load_dir, '*.tfrecord'))) - self.label_save_dir = f'{self.save_dir}/label_' - self.label_all_save_dir = f'{self.save_dir}/label_all' self.image_save_dir = f'{self.save_dir}/image_' - self.calib_save_dir = f'{self.save_dir}/calib' self.point_cloud_save_dir = f'{self.save_dir}/velodyne' - self.pose_save_dir = f'{self.save_dir}/pose' - self.timestamp_save_dir = f'{self.save_dir}/timestamp' - if self.save_cam_sync_labels: - self.cam_sync_label_save_dir = f'{self.save_dir}/cam_sync_label_' - self.cam_sync_label_all_save_dir = \ - f'{self.save_dir}/cam_sync_label_all' - self.create_folder() + # Create folder for saving KITTI format camera images and + # lidar point clouds. + if 'testing_3d_camera_only_detection' not in self.load_dir: + mmengine.mkdir_or_exist(self.point_cloud_save_dir) + for i in range(5): + mmengine.mkdir_or_exist(f'{self.image_save_dir}{str(i)}') def convert(self): """Convert action.""" - print('Start converting ...') - mmengine.track_parallel_progress(self.convert_one, range(len(self)), - self.workers) - print('\nFinished ...') + print_log(f'Start converting {self.split} dataset', logger='current') + if self.workers == 0: + data_infos = mmengine.track_progress(self.convert_one, + range(len(self))) + else: + data_infos = mmengine.track_parallel_progress( + self.convert_one, range(len(self)), self.workers) + data_list = [] + for data_info in data_infos: + data_list.extend(data_info) + metainfo = dict() + metainfo['dataset'] = 'waymo' + metainfo['version'] = '1.4' + metainfo['info_version'] = '1.1' + waymo_infos = dict(data_list=data_list, metainfo=metainfo) + filenames = osp.join( + osp.dirname(self.save_dir), + f'{self.info_prefix + self.info_map[self.split]}') + print_log(f'Saving {self.split} dataset infos into {filenames}') + mmengine.dump(waymo_infos, filenames) def convert_one(self, file_idx): - """Convert action for single file. + """Convert one '*.tfrecord' file to kitti format. Each file stores all + the frames (about 200 frames) in current scene. We treat each frame as + a sample, save their images and point clouds in kitti format, and then + create info for all frames. Args: file_idx (int): Index of the file to be converted. + + Returns: + file_infos (list): Waymo infos for all frames in current file. """ pathname = self.tfrecord_pathnames[file_idx] dataset = tf.data.TFRecordDataset(pathname, compression_type='') + # NOTE: file_infos is not shared between processes, only stores frame + # infos within the current file. + file_infos = [] for frame_idx, data in enumerate(dataset): frame = dataset_pb2.Frame() frame.ParseFromString(bytearray(data.numpy())) - if (self.selected_waymo_locations is not None - and frame.context.stats.location - not in self.selected_waymo_locations): - continue - self.save_image(frame, file_idx, frame_idx) - self.save_calib(frame, file_idx, frame_idx) - self.save_lidar(frame, file_idx, frame_idx) - self.save_pose(frame, file_idx, frame_idx) - self.save_timestamp(frame, file_idx, frame_idx) + # Step 1. + if self.save_image_and_lidar: + self.save_image(frame, file_idx, frame_idx) + self.save_lidar(frame, file_idx, frame_idx) - if not self.test_mode: - # TODO save the depth image for waymo challenge solution. - self.save_label(frame, file_idx, frame_idx) - if self.save_cam_sync_labels: - self.save_label(frame, file_idx, frame_idx, cam_sync=True) + # Step 2. + # TODO save the depth image for waymo challenge solution. + self.create_waymo_info_file(frame, file_idx, frame_idx, file_infos) + return file_infos def __len__(self): """Length of the filename list.""" @@ -162,62 +208,6 @@ def save_image(self, frame, file_idx, frame_idx): with open(img_path, 'wb') as fp: fp.write(img.image) - def save_calib(self, frame, file_idx, frame_idx): - """Parse and save the calibration data. - - Args: - frame (:obj:`Frame`): Open dataset frame proto. - file_idx (int): Current file index. - frame_idx (int): Current frame index. - """ - # waymo front camera to kitti reference camera - T_front_cam_to_ref = np.array([[0.0, -1.0, 0.0], [0.0, 0.0, -1.0], - [1.0, 0.0, 0.0]]) - camera_calibs = [] - R0_rect = [f'{i:e}' for i in np.eye(3).flatten()] - Tr_velo_to_cams = [] - calib_context = '' - - for camera in frame.context.camera_calibrations: - # extrinsic parameters - T_cam_to_vehicle = np.array(camera.extrinsic.transform).reshape( - 4, 4) - T_vehicle_to_cam = np.linalg.inv(T_cam_to_vehicle) - Tr_velo_to_cam = \ - self.cart_to_homo(T_front_cam_to_ref) @ T_vehicle_to_cam - if camera.name == 1: # FRONT = 1, see dataset.proto for details - self.T_velo_to_front_cam = Tr_velo_to_cam.copy() - Tr_velo_to_cam = Tr_velo_to_cam[:3, :].reshape((12, )) - Tr_velo_to_cams.append([f'{i:e}' for i in Tr_velo_to_cam]) - - # intrinsic parameters - camera_calib = np.zeros((3, 4)) - camera_calib[0, 0] = camera.intrinsic[0] - camera_calib[1, 1] = camera.intrinsic[1] - camera_calib[0, 2] = camera.intrinsic[2] - camera_calib[1, 2] = camera.intrinsic[3] - camera_calib[2, 2] = 1 - camera_calib = list(camera_calib.reshape(12)) - camera_calib = [f'{i:e}' for i in camera_calib] - camera_calibs.append(camera_calib) - - # all camera ids are saved as id-1 in the result because - # camera 0 is unknown in the proto - for i in range(5): - calib_context += 'P' + str(i) + ': ' + \ - ' '.join(camera_calibs[i]) + '\n' - calib_context += 'R0_rect' + ': ' + ' '.join(R0_rect) + '\n' - for i in range(5): - calib_context += 'Tr_velo_to_cam_' + str(i) + ': ' + \ - ' '.join(Tr_velo_to_cams[i]) + '\n' - - with open( - f'{self.calib_save_dir}/{self.prefix}' + - f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', - 'w+') as fp_calib: - fp_calib.write(calib_context) - fp_calib.close() - def save_lidar(self, frame, file_idx, frame_idx): """Parse and save the lidar data in psd format. @@ -275,194 +265,6 @@ def save_lidar(self, frame, file_idx, frame_idx): f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.bin' point_cloud.astype(np.float32).tofile(pc_path) - def save_label(self, frame, file_idx, frame_idx, cam_sync=False): - """Parse and save the label data in txt format. - The relation between waymo and kitti coordinates is noteworthy: - 1. x, y, z correspond to l, w, h (waymo) -> l, h, w (kitti) - 2. x-y-z: front-left-up (waymo) -> right-down-front(kitti) - 3. bbox origin at volumetric center (waymo) -> bottom center (kitti) - 4. rotation: +x around y-axis (kitti) -> +x around z-axis (waymo) - - Args: - frame (:obj:`Frame`): Open dataset frame proto. - file_idx (int): Current file index. - frame_idx (int): Current frame index. - cam_sync (bool, optional): Whether to save the cam sync labels. - Defaults to False. - """ - label_all_path = f'{self.label_all_save_dir}/{self.prefix}' + \ - f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt' - if cam_sync: - label_all_path = label_all_path.replace('label_', - 'cam_sync_label_') - fp_label_all = open(label_all_path, 'w+') - id_to_bbox = dict() - id_to_name = dict() - for labels in frame.projected_lidar_labels: - name = labels.name - for label in labels.labels: - # TODO: need a workaround as bbox may not belong to front cam - bbox = [ - label.box.center_x - label.box.length / 2, - label.box.center_y - label.box.width / 2, - label.box.center_x + label.box.length / 2, - label.box.center_y + label.box.width / 2 - ] - id_to_bbox[label.id] = bbox - id_to_name[label.id] = name - 1 - - for obj in frame.laser_labels: - bounding_box = None - name = None - id = obj.id - for proj_cam in self.cam_list: - if id + proj_cam in id_to_bbox: - bounding_box = id_to_bbox.get(id + proj_cam) - name = str(id_to_name.get(id + proj_cam)) - break - - # NOTE: the 2D labels do not have strict correspondence with - # the projected 2D lidar labels - # e.g.: the projected 2D labels can be in camera 2 - # while the most_visible_camera can have id 4 - if cam_sync: - if obj.most_visible_camera_name: - name = str( - self.cam_list.index( - f'_{obj.most_visible_camera_name}')) - box3d = obj.camera_synced_box - else: - continue - else: - box3d = obj.box - - if bounding_box is None or name is None: - name = '0' - bounding_box = (0, 0, 0, 0) - - my_type = self.type_list[obj.type] - - if my_type not in self.selected_waymo_classes: - continue - - if self.filter_empty_3dboxes and obj.num_lidar_points_in_box < 1: - continue - - my_type = self.waymo_to_kitti_class_map[my_type] - - height = box3d.height - width = box3d.width - length = box3d.length - - x = box3d.center_x - y = box3d.center_y - z = box3d.center_z - height / 2 - - # project bounding box to the virtual reference frame - pt_ref = self.T_velo_to_front_cam @ \ - np.array([x, y, z, 1]).reshape((4, 1)) - x, y, z, _ = pt_ref.flatten().tolist() - - rotation_y = -box3d.heading - np.pi / 2 - track_id = obj.id - - # not available - truncated = 0 - occluded = 0 - alpha = -10 - - line = my_type + \ - ' {} {} {} {} {} {} {} {} {} {} {} {} {} {}\n'.format( - round(truncated, 2), occluded, round(alpha, 2), - round(bounding_box[0], 2), round(bounding_box[1], 2), - round(bounding_box[2], 2), round(bounding_box[3], 2), - round(height, 2), round(width, 2), round(length, 2), - round(x, 2), round(y, 2), round(z, 2), - round(rotation_y, 2)) - - if self.save_track_id: - line_all = line[:-1] + ' ' + name + ' ' + track_id + '\n' - else: - line_all = line[:-1] + ' ' + name + '\n' - - label_path = f'{self.label_save_dir}{name}/{self.prefix}' + \ - f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt' - if cam_sync: - label_path = label_path.replace('label_', 'cam_sync_label_') - fp_label = open(label_path, 'a') - fp_label.write(line) - fp_label.close() - - fp_label_all.write(line_all) - - fp_label_all.close() - - def save_pose(self, frame, file_idx, frame_idx): - """Parse and save the pose data. - - Note that SDC's own pose is not included in the regular training - of KITTI dataset. KITTI raw dataset contains ego motion files - but are not often used. Pose is important for algorithms that - take advantage of the temporal information. - - Args: - frame (:obj:`Frame`): Open dataset frame proto. - file_idx (int): Current file index. - frame_idx (int): Current frame index. - """ - pose = np.array(frame.pose.transform).reshape(4, 4) - np.savetxt( - join(f'{self.pose_save_dir}/{self.prefix}' + - f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'), - pose) - - def save_timestamp(self, frame, file_idx, frame_idx): - """Save the timestamp data in a separate file instead of the - pointcloud. - - Note that SDC's own pose is not included in the regular training - of KITTI dataset. KITTI raw dataset contains ego motion files - but are not often used. Pose is important for algorithms that - take advantage of the temporal information. - - Args: - frame (:obj:`Frame`): Open dataset frame proto. - file_idx (int): Current file index. - frame_idx (int): Current frame index. - """ - with open( - join(f'{self.timestamp_save_dir}/{self.prefix}' + - f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'), - 'w') as f: - f.write(str(frame.timestamp_micros)) - - def create_folder(self): - """Create folder for data preprocessing.""" - if not self.test_mode: - dir_list1 = [ - self.label_all_save_dir, - self.calib_save_dir, - self.pose_save_dir, - self.timestamp_save_dir, - ] - dir_list2 = [self.label_save_dir, self.image_save_dir] - if self.save_cam_sync_labels: - dir_list1.append(self.cam_sync_label_all_save_dir) - dir_list2.append(self.cam_sync_label_save_dir) - else: - dir_list1 = [ - self.calib_save_dir, self.pose_save_dir, - self.timestamp_save_dir - ] - dir_list2 = [self.image_save_dir] - if 'testing_3d_camera_only_detection' not in self.load_dir: - dir_list1.append(self.point_cloud_save_dir) - for d in dir_list1: - mmengine.mkdir_or_exist(d) - for d in dir_list2: - for i in range(5): - mmengine.mkdir_or_exist(f'{d}{str(i)}') - def convert_range_image_to_point_cloud(self, frame, range_images, @@ -604,29 +406,317 @@ def cart_to_homo(self, mat): raise ValueError(mat.shape) return ret + def create_waymo_info_file(self, frame, file_idx, frame_idx, file_infos): + """Generate waymo train/val/test infos. + + For more details about infos, please refer to: + https://mmdetection3d.readthedocs.io/en/latest/advanced_guides/datasets/waymo.html + """ # noqa: E501 + frame_infos = dict() + + # Gather frame infos + sample_idx = \ + f'{self.prefix}{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}' + frame_infos['sample_idx'] = int(sample_idx) + frame_infos['timestamp'] = frame.timestamp_micros + frame_infos['ego2global'] = np.array(frame.pose.transform).reshape( + 4, 4).astype(np.float32).tolist() + frame_infos['context_name'] = frame.context.name + + # Gather camera infos + frame_infos['images'] = dict() + # waymo front camera to kitti reference camera + T_front_cam_to_ref = np.array([[0.0, -1.0, 0.0], [0.0, 0.0, -1.0], + [1.0, 0.0, 0.0]]) + camera_calibs = [] + Tr_velo_to_cams = [] + for camera in frame.context.camera_calibrations: + # extrinsic parameters + T_cam_to_vehicle = np.array(camera.extrinsic.transform).reshape( + 4, 4) + T_vehicle_to_cam = np.linalg.inv(T_cam_to_vehicle) + Tr_velo_to_cam = \ + self.cart_to_homo(T_front_cam_to_ref) @ T_vehicle_to_cam + Tr_velo_to_cams.append(Tr_velo_to_cam) + + # intrinsic parameters + camera_calib = np.zeros((3, 4)) + camera_calib[0, 0] = camera.intrinsic[0] + camera_calib[1, 1] = camera.intrinsic[1] + camera_calib[0, 2] = camera.intrinsic[2] + camera_calib[1, 2] = camera.intrinsic[3] + camera_calib[2, 2] = 1 + camera_calibs.append(camera_calib) + + for i, (cam_key, camera_calib, Tr_velo_to_cam) in enumerate( + zip(self.camera_types, camera_calibs, Tr_velo_to_cams)): + cam_infos = dict() + cam_infos['img_path'] = str(sample_idx) + '.jpg' + # NOTE: frames.images order is different + for img in frame.images: + if img.name == i + 1: + width, height = Image.open(BytesIO(img.image)).size + cam_infos['height'] = height + cam_infos['width'] = width + cam_infos['lidar2cam'] = Tr_velo_to_cam.astype(np.float32).tolist() + cam_infos['cam2img'] = camera_calib.astype(np.float32).tolist() + cam_infos['lidar2img'] = (camera_calib @ Tr_velo_to_cam).astype( + np.float32).tolist() + frame_infos['images'][cam_key] = cam_infos + + # Gather lidar infos + lidar_infos = dict() + lidar_infos['lidar_path'] = str(sample_idx) + '.bin' + lidar_infos['num_pts_feats'] = 6 + frame_infos['lidar_points'] = lidar_infos + + # Gather lidar sweeps and camera sweeps infos + # TODO: Add lidar2img in image sweeps infos when we need it. + # TODO: Consider merging lidar sweeps infos and image sweeps infos. + lidar_sweeps_infos, image_sweeps_infos = [], [] + for prev_offset in range(-1, -self.max_sweeps - 1, -1): + prev_lidar_infos = dict() + prev_image_infos = dict() + if frame_idx + prev_offset >= 0: + prev_frame_infos = file_infos[prev_offset] + prev_lidar_infos['timestamp'] = prev_frame_infos['timestamp'] + prev_lidar_infos['ego2global'] = prev_frame_infos['ego2global'] + prev_lidar_infos['lidar_points'] = dict() + lidar_path = prev_frame_infos['lidar_points']['lidar_path'] + prev_lidar_infos['lidar_points']['lidar_path'] = lidar_path + lidar_sweeps_infos.append(prev_lidar_infos) + + prev_image_infos['timestamp'] = prev_frame_infos['timestamp'] + prev_image_infos['ego2global'] = prev_frame_infos['ego2global'] + prev_image_infos['images'] = dict() + for cam_key in self.camera_types: + prev_image_infos['images'][cam_key] = dict() + img_path = prev_frame_infos['images'][cam_key]['img_path'] + prev_image_infos['images'][cam_key]['img_path'] = img_path + image_sweeps_infos.append(prev_image_infos) + if lidar_sweeps_infos: + frame_infos['lidar_sweeps'] = lidar_sweeps_infos + if image_sweeps_infos: + frame_infos['image_sweeps'] = image_sweeps_infos + + if not self.test_mode: + # Gather instances infos which is used for lidar-based 3D detection + frame_infos['instances'] = self.gather_instance_info(frame) + # Gather cam_sync_instances infos which is used for image-based + # (multi-view) 3D detection. + if self.save_cam_sync_instances: + frame_infos['cam_sync_instances'] = self.gather_instance_info( + frame, cam_sync=True) + # Gather cam_instances infos which is used for image-based + # (monocular) 3D detection (optional). + # TODO: Should we use cam_sync_instances to generate cam_instances? + if self.save_cam_instances: + frame_infos['cam_instances'] = self.gather_cam_instance_info( + copy.deepcopy(frame_infos['instances']), + frame_infos['images']) + file_infos.append(frame_infos) + + def gather_instance_info(self, frame, cam_sync=False): + """Generate instances and cam_sync_instances infos. + + For more details about infos, please refer to: + https://mmdetection3d.readthedocs.io/en/latest/advanced_guides/datasets/waymo.html + """ # noqa: E501 + id_to_bbox = dict() + id_to_name = dict() + for labels in frame.projected_lidar_labels: + name = labels.name + for label in labels.labels: + # TODO: need a workaround as bbox may not belong to front cam + bbox = [ + label.box.center_x - label.box.length / 2, + label.box.center_y - label.box.width / 2, + label.box.center_x + label.box.length / 2, + label.box.center_y + label.box.width / 2 + ] + id_to_bbox[label.id] = bbox + id_to_name[label.id] = name - 1 + + group_id = 0 + instance_infos = [] + for obj in frame.laser_labels: + instance_info = dict() + bounding_box = None + name = None + id = obj.id + for proj_cam in self.cam_list: + if id + proj_cam in id_to_bbox: + bounding_box = id_to_bbox.get(id + proj_cam) + name = id_to_name.get(id + proj_cam) + break + + # NOTE: the 2D labels do not have strict correspondence with + # the projected 2D lidar labels + # e.g.: the projected 2D labels can be in camera 2 + # while the most_visible_camera can have id 4 + if cam_sync: + if obj.most_visible_camera_name: + name = self.cam_list.index( + f'_{obj.most_visible_camera_name}') + box3d = obj.camera_synced_box + else: + continue + else: + box3d = obj.box + + if bounding_box is None or name is None: + name = 0 + bounding_box = [0.0, 0.0, 0.0, 0.0] + + my_type = self.type_list[obj.type] + + if my_type not in self.selected_waymo_classes: + continue + else: + label = self.selected_waymo_classes.index(my_type) + + if self.filter_empty_3dboxes and obj.num_lidar_points_in_box < 1: + continue + + group_id += 1 + instance_info['group_id'] = group_id + instance_info['camera_id'] = name + instance_info['bbox'] = bounding_box + instance_info['bbox_label'] = label + + height = box3d.height + width = box3d.width + length = box3d.length + + # NOTE: We save the bottom center of 3D bboxes. + x = box3d.center_x + y = box3d.center_y + z = box3d.center_z - height / 2 + + rotation_y = box3d.heading + + instance_info['bbox_3d'] = np.array( + [x, y, z, length, width, height, + rotation_y]).astype(np.float32).tolist() + instance_info['bbox_label_3d'] = label + instance_info['num_lidar_pts'] = obj.num_lidar_points_in_box + + if self.save_track_id: + instance_info['track_id'] = obj.id + instance_infos.append(instance_info) + return instance_infos + + def gather_cam_instance_info(self, instances: dict, images: dict): + """Generate cam_instances infos. + + For more details about infos, please refer to: + https://mmdetection3d.readthedocs.io/en/latest/advanced_guides/datasets/waymo.html + """ # noqa: E501 + cam_instances = dict() + for cam_type in self.camera_types: + lidar2cam = np.array(images[cam_type]['lidar2cam']) + cam2img = np.array(images[cam_type]['cam2img']) + cam_instances[cam_type] = [] + for instance in instances: + cam_instance = dict() + gt_bboxes_3d = np.array(instance['bbox_3d']) + # Convert lidar coordinates to camera coordinates + gt_bboxes_3d = LiDARInstance3DBoxes( + gt_bboxes_3d[None, :]).convert_to( + Box3DMode.CAM, lidar2cam, correct_yaw=True) + corners_3d = gt_bboxes_3d.corners.numpy() + corners_3d = corners_3d[0].T # (1, 8, 3) -> (3, 8) + in_camera = np.argwhere(corners_3d[2, :] > 0).flatten() + corners_3d = corners_3d[:, in_camera] + # Project 3d box to 2d. + corner_coords = view_points(corners_3d, cam2img, + True).T[:, :2].tolist() + + # Keep only corners that fall within the image. + # TODO: imsize should be determined by the current image size + # CAM_FRONT: (1920, 1280) + # CAM_FRONT_LEFT: (1920, 1280) + # CAM_SIDE_LEFT: (1920, 886) + final_coords = post_process_coords( + corner_coords, + imsize=(images['CAM_FRONT']['width'], + images['CAM_FRONT']['height'])) + + # Skip if the convex hull of the re-projected corners + # does not intersect the image canvas. + if final_coords is None: + continue + else: + min_x, min_y, max_x, max_y = final_coords + + cam_instance['bbox'] = [min_x, min_y, max_x, max_y] + cam_instance['bbox_label'] = instance['bbox_label'] + cam_instance['bbox_3d'] = gt_bboxes_3d.numpy().squeeze( + ).astype(np.float32).tolist() + cam_instance['bbox_label_3d'] = instance['bbox_label_3d'] + + center_3d = gt_bboxes_3d.gravity_center.numpy() + center_2d_with_depth = points_cam2img( + center_3d, cam2img, with_depth=True) + center_2d_with_depth = center_2d_with_depth.squeeze().tolist() + + # normalized center2D + depth + # if samples with depth < 0 will be removed + if center_2d_with_depth[2] <= 0: + continue + cam_instance['center_2d'] = center_2d_with_depth[:2] + cam_instance['depth'] = center_2d_with_depth[2] + + # TODO: Discuss whether following info is necessary + cam_instance['bbox_3d_isvalid'] = True + cam_instance['velocity'] = -1 + cam_instances[cam_type].append(cam_instance) + + return cam_instances + + def merge_trainval_infos(self): + """Merge training and validation infos into a single file.""" + train_infos_path = osp.join( + osp.dirname(self.save_dir), f'{self.info_prefix}_infos_train.pkl') + val_infos_path = osp.join( + osp.dirname(self.save_dir), f'{self.info_prefix}_infos_val.pkl') + train_infos = mmengine.load(train_infos_path) + val_infos = mmengine.load(val_infos_path) + trainval_infos = dict( + metainfo=train_infos['metainfo'], + data_list=train_infos['data_list'] + val_infos['data_list']) + mmengine.dump( + trainval_infos, + osp.join( + osp.dirname(self.save_dir), + f'{self.info_prefix}_infos_trainval.pkl')) + def create_ImageSets_img_ids(root_dir, splits): + """Create txt files indicating what to collect in each split.""" save_dir = join(root_dir, 'ImageSets/') if not exists(save_dir): os.mkdir(save_dir) - idx_all = [[] for i in splits] + idx_all = [[] for _ in splits] for i, split in enumerate(splits): - path = join(root_dir, splits[i], 'calib') + path = join(root_dir, split, 'image_0') if not exists(path): RawNames = [] else: RawNames = os.listdir(path) for name in RawNames: - if name.endswith('.txt'): - idx = name.replace('.txt', '\n') + if name.endswith('.jpg'): + idx = name.replace('.jpg', '\n') idx_all[int(idx[0])].append(idx) idx_all[i].sort() open(save_dir + 'train.txt', 'w').writelines(idx_all[0]) open(save_dir + 'val.txt', 'w').writelines(idx_all[1]) open(save_dir + 'trainval.txt', 'w').writelines(idx_all[0] + idx_all[1]) - open(save_dir + 'test.txt', 'w').writelines(idx_all[2]) - # open(save_dir+'test_cam_only.txt','w').writelines(idx_all[3]) + if len(idx_all) >= 3: + open(save_dir + 'test.txt', 'w').writelines(idx_all[2]) + if len(idx_all) >= 4: + open(save_dir + 'test_cam_only.txt', 'w').writelines(idx_all[3]) print('created txt files indicating what to collect in ', splits)