diff --git a/armory/datasets/adversarial/carla_video_tracking_dev/__init__.py b/armory/datasets/adversarial/carla_video_tracking_dev/__init__.py new file mode 100644 index 000000000..fb635dafb --- /dev/null +++ b/armory/datasets/adversarial/carla_video_tracking_dev/__init__.py @@ -0,0 +1,3 @@ +"""carla_video_tracking_dev dataset.""" + +from .carla_video_tracking_dev import CarlaVideoTrackingDev diff --git a/armory/datasets/adversarial/carla_video_tracking_dev/carla_video_tracking_dev.py b/armory/datasets/adversarial/carla_video_tracking_dev/carla_video_tracking_dev.py new file mode 100644 index 000000000..2a4005446 --- /dev/null +++ b/armory/datasets/adversarial/carla_video_tracking_dev/carla_video_tracking_dev.py @@ -0,0 +1,124 @@ +"""carla_video_tracking_dev dataset.""" + +import os +import glob +import numpy as np +from PIL import Image +import tensorflow as tf +import tensorflow_datasets as tfds + +_DESCRIPTION = """ +Synthetic single modality dataset generated using CARLA (https://carla.org). +""" + +_CITATION = """ +@inproceedings{Dosovitskiy17, + title = { {CARLA}: {An} Open Urban Driving Simulator}, + author = {Alexey Dosovitskiy and German Ros and Felipe Codevilla and Antonio Lopez and Vladlen Koltun}, + booktitle = {Proceedings of the 1st Annual Conference on Robot Learning}, + pages = {1--16}, + year = {2017} +} +""" + +_URL = "https://armory-public-data.s3.us-east-2.amazonaws.com/carla/carla_video_tracking_dev_2.0.0.tar.gz" + + +class CarlaVideoTrackingDev(tfds.core.GeneratorBasedBuilder): + """DatasetBuilder for carla_video_tracking_dev dataset.""" + + VERSION = tfds.core.Version("2.0.0") + RELEASE_NOTES = { + "1.0.0": "Initial release.", + "2.0.0": "Eval 5 CARLA single object tracking data with higher resolution, HD texture, higher frame rate, multiple non-tracked objects, and camera motion", + } + + def _info(self) -> tfds.core.DatasetInfo: + """Returns the dataset metadata.""" + + features = tfds.features.FeaturesDict( + { + "video": tfds.features.Video( + (None, 960, 1280, 3), + encoding_format="png", + ), + "bboxes": tfds.features.Sequence( + tfds.features.Tensor( + shape=[4], dtype=tf.int64 + ), # ground truth unormalized object bounding boxes given as [x1,y1,x2,y2] + ), + # these data only apply to the "green screen patch" objects + "patch_metadata": tfds.features.FeaturesDict( + { + "gs_coords": tfds.features.Tensor( + shape=[None, 4, 2], dtype=tf.int64 + ), + "masks": tfds.features.Tensor( + shape=[None, 960, 1280, 3], dtype=tf.uint8 + ), + } + ), + } + ) + + return tfds.core.DatasetInfo( + builder=self, + description=_DESCRIPTION, + features=features, + citation=_CITATION, + metadata=tfds.core.MetadataDict( + { + "frame_rate": 10, + } + ), + ) + + def _split_generators(self, dl_manager: tfds.download.DownloadManager): + """Returns SplitGenerators.""" + path = dl_manager.download_and_extract(_URL) + + return {"dev": self._generate_examples(path / "dev")} + + def _generate_examples(self, path): + """Yields examples.""" + + videos = os.listdir(path) + videos.sort() + print("videos: {}".format(videos)) + + for vi, video in enumerate(videos): + # Get all frames in a video + all_frames = glob.glob( + os.path.join(path, video, "*.png") + ) # all images including RGB and foreground mask + mask_frames = glob.glob( + os.path.join(path, video, "*_mask.png") + ) # all foreground masks + rgb_frames = list(set(all_frames) - set(mask_frames)) # all rgb frames + + # sort alphabetically + rgb_frames.sort() + mask_frames.sort() + + # verify pairing of RGB and mask + for r, m in zip(rgb_frames, mask_frames): + assert r.split(".")[-2] in m + + # get binarized patch masks + masks = [] + for mf in mask_frames: + mask = Image.open(os.path.join(path, video, mf)).convert("RGB") + mask = np.array(mask, dtype=np.uint8) + mask[np.all(mask == [255, 255, 255], axis=-1)] = 1 + masks.append(mask) + + example = { + "video": rgb_frames, + "bboxes": np.load(os.path.join(path, video, "gt_boxes.npy")), + "patch_metadata": { + "gs_coords": np.load(os.path.join(path, video, "gs_coords.npy")), + "masks": masks, + }, + } + + yield vi, example diff --git a/armory/datasets/adversarial/carla_video_tracking_dev/checksums.tsv b/armory/datasets/adversarial/carla_video_tracking_dev/checksums.tsv new file mode 100644 index 000000000..585b6fe06 --- /dev/null +++ b/armory/datasets/adversarial/carla_video_tracking_dev/checksums.tsv @@ -0,0 +1 @@ +https://armory-public-data.s3.us-east-2.amazonaws.com/carla/carla_video_tracking_dev_2.0.0.tar.gz 1278862237 8b23ca76bd9602a8e3ff4058335b7fb8ca665660a8a958852715e9a26ffbef20 carla_video_tracking_dev_2.0.0.tar.gz diff --git a/armory/datasets/adversarial/carla_video_tracking_test/__init__.py b/armory/datasets/adversarial/carla_video_tracking_test/__init__.py new file mode 100644 index 000000000..4b0b22011 --- /dev/null +++ b/armory/datasets/adversarial/carla_video_tracking_test/__init__.py @@ -0,0 +1,3 @@ +"""carla_video_tracking_test dataset.""" + +from .carla_video_tracking_test import CarlaVideoTrackingTest diff --git a/armory/datasets/adversarial/carla_video_tracking_test/carla_video_tracking_test.py b/armory/datasets/adversarial/carla_video_tracking_test/carla_video_tracking_test.py new file mode 100644 index 000000000..b19d6d29d --- /dev/null +++ b/armory/datasets/adversarial/carla_video_tracking_test/carla_video_tracking_test.py @@ -0,0 +1,119 @@ +"""carla_video_tracking_test dataset.""" + +import os +import glob +import numpy as np +from PIL import Image +import tensorflow as tf +import tensorflow_datasets as tfds + + +_DESCRIPTION = """ +Synthetic single modality dataset generated using CARLA (https://carla.org). +""" + +_CITATION = """ +@inproceedings{Dosovitskiy17, + title = { {CARLA}: {An} Open Urban Driving Simulator}, + author = {Alexey Dosovitskiy and German Ros and Felipe Codevilla and Antonio Lopez and Vladlen Koltun}, + booktitle = {Proceedings of the 1st Annual Conference on Robot Learning}, + pages = {1--16}, + year = {2017} +} +""" + +_URL = "https://armory-public-data.s3.us-east-2.amazonaws.com/carla/carla_video_tracking_test_2.0.0.tar.gz" + + +class CarlaVideoTrackingTest(tfds.core.GeneratorBasedBuilder): + """DatasetBuilder for carla_video_tracking_test dataset.""" + + VERSION = tfds.core.Version("2.0.0") + RELEASE_NOTES = { + "1.0.0": "Initial release.", + "2.0.0": "Eval 5 CARLA single object tracking data with higher resolution, HD texture, higher frame rate, multiple non-tracked objects, and camera motion", + } + + def _info(self) -> tfds.core.DatasetInfo: + """Returns the dataset metadata.""" + + features = tfds.features.FeaturesDict( + { + "video": tfds.features.Video( + (None, 960, 1280, 3), + encoding_format="png", + ), + "bboxes": tfds.features.Sequence( + tfds.features.Tensor( + shape=[4], dtype=tf.int64 + ), # ground truth unormalized object bounding boxes given as [x1,y1,x2,y2] + ), + # these data only apply to the "green screen patch" objects + "patch_metadata": tfds.features.FeaturesDict( + { + "gs_coords": tfds.features.Tensor( + shape=[None, 4, 2], dtype=tf.int64 + ), + "masks": tfds.features.Tensor( + shape=[None, 960, 1280, 3], dtype=tf.uint8 + ), + } + ), + } + ) + + return tfds.core.DatasetInfo( + builder=self, + description=_DESCRIPTION, + features=features, + citation=_CITATION, + ) + + def _split_generators(self, dl_manager: tfds.download.DownloadManager): + """Returns SplitGenerators.""" + path = dl_manager.download_and_extract(_URL) + return {"test": self._generate_examples(path / "test")} + + def _generate_examples(self, path): + """Yields examples.""" + + videos = os.listdir(path) + videos.sort() + print("videos: {}".format(videos)) + + for vi, video in enumerate(videos): + # Get all frames in a video + all_frames = glob.glob( + os.path.join(path, video, "*.png") + ) # all images including RGB and foreground mask + mask_frames = glob.glob( + os.path.join(path, video, "*_mask.png") + ) # all foreground masks + rgb_frames = list(set(all_frames) - set(mask_frames)) # all rgb frames + + # sort alphabetically + rgb_frames.sort() + mask_frames.sort() + + # verify pairing of RGB and mask + for r, m in zip(rgb_frames, mask_frames): + assert r.split(".")[-2] in m + + # get binarized patch masks + masks = [] + for mf in mask_frames: + mask = Image.open(os.path.join(path, video, mf)).convert("RGB") + mask = np.array(mask, dtype=np.uint8) + mask[np.all(mask == [255, 255, 255], axis=-1)] = 1 + masks.append(mask) + + example = { + "video": rgb_frames, + "bboxes": np.load(os.path.join(path, video, "gt_boxes.npy")), + "patch_metadata": { + "gs_coords": np.load(os.path.join(path, video, "gs_coords.npy")), + "masks": masks, + }, + } + + yield vi, example diff --git a/armory/datasets/adversarial/carla_video_tracking_test/checksums.tsv b/armory/datasets/adversarial/carla_video_tracking_test/checksums.tsv new file mode 100644 index 000000000..de1b9276d --- /dev/null +++ b/armory/datasets/adversarial/carla_video_tracking_test/checksums.tsv @@ -0,0 +1 @@ +https://armory-public-data.s3.us-east-2.amazonaws.com/carla/carla_video_tracking_test_2.0.0.tar.gz 387465525 6bd09f5cf50c0e16f34b5054e9d77f95cb4491a373ecb842431cc58ae50b882e carla_video_tracking_test_2.0.0.tar.gz diff --git a/armory/datasets/cached_datasets.json b/armory/datasets/cached_datasets.json index ada513788..51eb26fca 100644 --- a/armory/datasets/cached_datasets.json +++ b/armory/datasets/cached_datasets.json @@ -27,6 +27,19 @@ "url": null, "version": "1.0.0" }, + "carla_video_tracking_dev": { + "sha256": "958d470dcd394928050f4123a7af05b0e389ceeec6fa0a3261df55a65e553b69", + "size": 1281628036, + "subdir": "carla_video_tracking_dev/2.0.0", + "url": null, + "version": "2.0.0" + }, + "carla_video_tracking_test": { + "sha256": "8c52281611807243cba425ad3a588f4abca40dfb2b3ab828b9ad8a5191a7df10", + "size": 388218968, + "subdir": "carla_video_tracking_test/2.0.0", + "url": null, + "version": "2.0.0" "carla_over_obj_det_train": { "sha256": "1fa2626726df4de8c0878f97ededdde90d4c55a4a845c8cee8db26a9797a2c6f", "size": 14396331191, diff --git a/armory/datasets/generator.py b/armory/datasets/generator.py index 1224b35b0..9fbd4018f 100644 --- a/armory/datasets/generator.py +++ b/armory/datasets/generator.py @@ -141,6 +141,7 @@ def __init__( shuffle_elements=shuffle_elements, element_filter=element_filter, element_map=element_map, + metadata=info.metadata if info.metadata else tfds.core.MetadataDict(), ) def _set_params(self, **kwargs): diff --git a/armory/datasets/preprocessing.py b/armory/datasets/preprocessing.py index 4ac325136..a47925924 100644 --- a/armory/datasets/preprocessing.py +++ b/armory/datasets/preprocessing.py @@ -98,6 +98,47 @@ def carla_over_obj_det_dev(element, modality="rgb"): ) +def carla_video_tracking_preprocess(x, max_frames=None): + # Clip + if max_frames: + max_frames = int(max_frames) + if max_frames <= 0: + raise ValueError(f"max_frames {max_frames} must be > 0") + x = x[:max_frames, :] + x = tf.cast(x, tf.float32) / 255.0 + return x + + +def carla_video_tracking_preprocess_labels(y, y_patch_metadata, max_frames=None): + # Clip + if max_frames: + max_frames = int(max_frames) + if max_frames <= 0: + raise ValueError(f"max_frames {max_frames} must be > 0") + y = y[:max_frames, :] + y_patch_metadata = {k: v[:max_frames, :] for (k, v) in y_patch_metadata.items()} + # Update labels + y = {"boxes": y} + y_patch_metadata = { + k: (tf.squeeze(v, axis=0) if v.shape[0] == 1 else v) + for k, v in y_patch_metadata.items() + } + return y, y_patch_metadata + + +def carla_video_tracking(element, max_frames=None): + return carla_video_tracking_preprocess( + element["video"], + max_frames=max_frames, + ), carla_video_tracking_preprocess_labels( + element["bboxes"], element["patch_metadata"], max_frames=max_frames + ) + + +carla_video_tracking_dev = register(carla_video_tracking, "carla_video_tracking_dev") +carla_video_tracking_test = register(carla_video_tracking, "carla_video_tracking_test") + + @register def carla_over_obj_det_train(element, modality="rgb"): return carla_multimodal_obj_det( diff --git a/armory/datasets/standard/resisc45/resisc45.py b/armory/datasets/standard/resisc45/resisc45.py index 4cf93699d..60b6b483a 100644 --- a/armory/datasets/standard/resisc45/resisc45.py +++ b/armory/datasets/standard/resisc45/resisc45.py @@ -116,9 +116,9 @@ def _split_generators(self, dl_manager): def _generate_examples(self, path, split): """Yields examples.""" for label in _LABELS: - for idx, filename in enumerate(sorted( - tf.io.gfile.glob(f"{path}/NWPU-RESISC45/{label}/*.jpg") - )): + for idx, filename in enumerate( + sorted(tf.io.gfile.glob(f"{path}/NWPU-RESISC45/{label}/*.jpg")) + ): basename = os.path.basename(filename) if basename != f"{label}_{idx+1:03}.jpg": raise ValueError(f"Found unexpected file {basename}") diff --git a/armory/metrics/task.py b/armory/metrics/task.py index af68cf958..7d893c2ca 100644 --- a/armory/metrics/task.py +++ b/armory/metrics/task.py @@ -522,7 +522,7 @@ def _check_video_tracking_input(y, y_pred): y_pred (List[Dict, ...]): same as above """ for input in [y, y_pred]: - assert isinstance(input, list) + assert isinstance(input, list), f"Expected List[Dict, ...] got {type(input)}" for input_dict_i in input: assert isinstance(input_dict_i, dict) assert "boxes" in input_dict_i diff --git a/armory/scenarios/carla_video_tracking.py b/armory/scenarios/carla_video_tracking.py index 8a30e5a79..b6b918193 100644 --- a/armory/scenarios/carla_video_tracking.py +++ b/armory/scenarios/carla_video_tracking.py @@ -10,6 +10,8 @@ from armory.instrument.export import ExportMeter, VideoTrackingExporter from armory.scenarios.scenario import Scenario +DEFAULT_FRAME_RATE = 10 + class CarlaVideoTracking(Scenario): def __init__(self, *args, **kwargs): @@ -24,9 +26,23 @@ def load_test_dataset(self): raise ValueError("batch_size must be 1 for evaluation.") super().load_test_dataset(test_split_default="dev") + def _split_batches_into_list(self, x: dict) -> list: + """Fix for tfdsv4 upgrade - separate batches into list""" + if not isinstance(x, dict): + return x + expected_batches = list(x.values())[0].shape[0] + if not all( + isinstance(x[k], np.ndarray) and x[k].shape[0] == expected_batches + for k in x + ): + raise ValueError( + f"Expected all values to have the same length (batches), but got {x}" + ) + return [dict((k, v[i]) for k, v in x.items()) for i in range(expected_batches)] + def next(self): super().next() - self.y, self.y_patch_metadata = self.y + self.y, self.y_patch_metadata = map(self._split_batches_into_list, self.y) self.probe.update(y=self.y, y_patch_metadata=self.y_patch_metadata) def run_benign(self): @@ -57,7 +73,7 @@ def run_attack(self): x_adv = self.attack.generate( x=x, y=y_target, - y_patch_metadata=[self.y_patch_metadata], + y_patch_metadata=self.y_patch_metadata, **self.generate_kwargs, ) @@ -76,7 +92,7 @@ def run_attack(self): def _load_sample_exporter(self): return VideoTrackingExporter( self.export_dir, - frame_rate=self.test_dataset.context.frame_rate, + frame_rate=self.test_dataset.metadata.get("frame_rate", DEFAULT_FRAME_RATE), ) def load_export_meters(self): @@ -86,7 +102,7 @@ def load_export_meters(self): # Add export meters that export examples with boxes overlaid self.sample_exporter_with_boxes = VideoTrackingExporter( self.export_dir, - frame_rate=self.test_dataset.context.frame_rate, + frame_rate=self.test_dataset.metadata.get("frame_rate", DEFAULT_FRAME_RATE), default_export_kwargs={"with_boxes": True}, ) for probe_data, probe_pred in [("x", "y_pred"), ("x_adv", "y_pred_adv")]: diff --git a/armory/scenarios/video_ucf101_scenario.py b/armory/scenarios/video_ucf101_scenario.py index 673f6b50e..b00bf5e48 100644 --- a/armory/scenarios/video_ucf101_scenario.py +++ b/armory/scenarios/video_ucf101_scenario.py @@ -7,6 +7,8 @@ from armory.instrument.export import VideoClassificationExporter from armory.scenarios.scenario import Scenario +DEFAULT_FRAME_RATE = 10 + class Ucf101(Scenario): def load_test_dataset(self): @@ -20,5 +22,5 @@ def load_test_dataset(self): def _load_sample_exporter(self): return VideoClassificationExporter( self.export_dir, - frame_rate=self.test_dataset.context.frame_rate, + frame_rate=self.test_dataset.metadata.get("frame_rate", DEFAULT_FRAME_RATE), ) diff --git a/armory/utils/config_loading.py b/armory/utils/config_loading.py index eb0101923..2e08017cb 100644 --- a/armory/utils/config_loading.py +++ b/armory/utils/config_loading.py @@ -30,7 +30,7 @@ from armory.art_experimental.attacks import patch from armory.art_experimental.attacks.sweep import SweepAttack -from armory.data.datasets import ArmoryDataGenerator, EvalGenerator +from armory.datasets.generator import ArmoryDataGenerator from armory.data.utils import maybe_download_weights_from_s3 from armory.utils import labels diff --git a/scenario_configs/eval5/carla_video_tracking/carla_video_tracking_goturn_advtextures_defended.json b/scenario_configs/eval5/carla_video_tracking/carla_video_tracking_goturn_advtextures_defended.json index c4f56479f..f3a8b3e0c 100755 --- a/scenario_configs/eval5/carla_video_tracking/carla_video_tracking_goturn_advtextures_defended.json +++ b/scenario_configs/eval5/carla_video_tracking/carla_video_tracking_goturn_advtextures_defended.json @@ -14,11 +14,11 @@ "use_label": true }, "dataset": { - "batch_size": 1, - "eval_split": "dev", - "framework": "numpy", - "module": "armory.data.adversarial_datasets", - "name": "carla_video_tracking_dev" + "test": { + "batch_size": 1, + "split": "dev", + "name": "carla_video_tracking_dev" + } }, "defense": { "kwargs": { diff --git a/scenario_configs/eval5/carla_video_tracking/carla_video_tracking_goturn_advtextures_undefended.json b/scenario_configs/eval5/carla_video_tracking/carla_video_tracking_goturn_advtextures_undefended.json index 3dd6dc8a0..a6906c4eb 100755 --- a/scenario_configs/eval5/carla_video_tracking/carla_video_tracking_goturn_advtextures_undefended.json +++ b/scenario_configs/eval5/carla_video_tracking/carla_video_tracking_goturn_advtextures_undefended.json @@ -14,11 +14,11 @@ "use_label": true }, "dataset": { - "batch_size": 1, - "eval_split": "dev", - "framework": "numpy", - "module": "armory.data.adversarial_datasets", - "name": "carla_video_tracking_dev" + "test": { + "batch_size": 1, + "split": "dev", + "name": "carla_video_tracking_dev" + } }, "defense": null, "metric": {