From 87564f784314ee42f264d991bd9e174d402577e6 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Mon, 10 Feb 2025 10:47:14 +0100 Subject: [PATCH] Checklist for LeRobot dataloader (#8949) It's a bit of an odd one because of the way the dataloader works, but I would still feel much better with an odd test than no tests at all. I've uploaded a heavily sampled down `apple_storage` dataset to LFS in order to make this work. * DNM: requires #8936 --- scripts/lint.py | 1 + .../lerobot/apple_storage/.gitattributes | 59 ++++ tests/assets/lerobot/apple_storage/README.md | 125 +++++++ .../data/chunk-000/episode_000000.parquet | 3 + .../data/chunk-000/episode_000001.parquet | 3 + .../data/chunk-000/episode_000002.parquet | 3 + .../lerobot/apple_storage/meta/episodes.jsonl | 3 + .../lerobot/apple_storage/meta/info.json | 90 +++++ .../lerobot/apple_storage/meta/stats.json | 314 ++++++++++++++++++ .../lerobot/apple_storage/meta/tasks.jsonl | 1 + .../observation.image/episode_000000.mp4 | 3 + .../observation.image/episode_000001.mp4 | 3 + .../observation.image/episode_000002.mp4 | 3 + .../check_lerobot_dataloader.py | 44 +++ 14 files changed, 655 insertions(+) create mode 100644 tests/assets/lerobot/apple_storage/.gitattributes create mode 100644 tests/assets/lerobot/apple_storage/README.md create mode 100644 tests/assets/lerobot/apple_storage/data/chunk-000/episode_000000.parquet create mode 100644 tests/assets/lerobot/apple_storage/data/chunk-000/episode_000001.parquet create mode 100644 tests/assets/lerobot/apple_storage/data/chunk-000/episode_000002.parquet create mode 100644 tests/assets/lerobot/apple_storage/meta/episodes.jsonl create mode 100644 tests/assets/lerobot/apple_storage/meta/info.json create mode 100644 tests/assets/lerobot/apple_storage/meta/stats.json create mode 100644 tests/assets/lerobot/apple_storage/meta/tasks.jsonl create mode 100644 tests/assets/lerobot/apple_storage/videos/chunk-000/observation.image/episode_000000.mp4 create mode 100644 tests/assets/lerobot/apple_storage/videos/chunk-000/observation.image/episode_000001.mp4 create mode 100644 tests/assets/lerobot/apple_storage/videos/chunk-000/observation.image/episode_000002.mp4 create mode 100644 tests/python/release_checklist/check_lerobot_dataloader.py diff --git a/scripts/lint.py b/scripts/lint.py index f9ba2ed823ea..99c3f2b24c00 100755 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -1251,6 +1251,7 @@ def main() -> None: "./run_wasm/README.md", # Has a "2d" lowercase example in a code snippet "./scripts/lint.py", # we contain all the patterns we are linting against "./scripts/zombie_todos.py", + "./tests/assets/lerobot/apple_storage/README.md", # not ours "./tests/python/gil_stress/main.py", "./tests/python/release_checklist/main.py", "./web_viewer/re_viewer.js", # auto-generated by wasm_bindgen diff --git a/tests/assets/lerobot/apple_storage/.gitattributes b/tests/assets/lerobot/apple_storage/.gitattributes new file mode 100644 index 000000000000..1ef325f1b111 --- /dev/null +++ b/tests/assets/lerobot/apple_storage/.gitattributes @@ -0,0 +1,59 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.lz4 filter=lfs diff=lfs merge=lfs -text +*.mds filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +# Audio files - uncompressed +*.pcm filter=lfs diff=lfs merge=lfs -text +*.sam filter=lfs diff=lfs merge=lfs -text +*.raw filter=lfs diff=lfs merge=lfs -text +# Audio files - compressed +*.aac filter=lfs diff=lfs merge=lfs -text +*.flac filter=lfs diff=lfs merge=lfs -text +*.mp3 filter=lfs diff=lfs merge=lfs -text +*.ogg filter=lfs diff=lfs merge=lfs -text +*.wav filter=lfs diff=lfs merge=lfs -text +# Image files - uncompressed +*.bmp filter=lfs diff=lfs merge=lfs -text +*.gif filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.tiff filter=lfs diff=lfs merge=lfs -text +# Image files - compressed +*.jpg filter=lfs diff=lfs merge=lfs -text +*.jpeg filter=lfs diff=lfs merge=lfs -text +*.webp filter=lfs diff=lfs merge=lfs -text +# Video files - compressed +*.mp4 filter=lfs diff=lfs merge=lfs -text +*.webm filter=lfs diff=lfs merge=lfs -text diff --git a/tests/assets/lerobot/apple_storage/README.md b/tests/assets/lerobot/apple_storage/README.md new file mode 100644 index 000000000000..8414475b6dfc --- /dev/null +++ b/tests/assets/lerobot/apple_storage/README.md @@ -0,0 +1,125 @@ +--- +license: apache-2.0 +task_categories: +- robotics +tags: +- LeRobot +configs: +- config_name: default + data_files: data/*/*.parquet +--- + +This dataset was created using [LeRobot](https://github.com/huggingface/lerobot). + +## Dataset Description + + + +- **Homepage:** [More Information Needed] +- **Paper:** [More Information Needed] +- **License:** apache-2.0 + +## Dataset Structure + +[meta/info.json](meta/info.json): +```json +{ + "codebase_version": "v2.0", + "robot_type": "reachy2", + "total_episodes": 50, + "total_frames": 14983, + "total_tasks": 1, + "total_videos": 50, + "total_chunks": 1, + "chunks_size": 1000, + "fps": 30, + "splits": { + "train": "0:50" + }, + "data_path": "data/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.parquet", + "video_path": "videos/chunk-{episode_chunk:03d}/{video_key}/episode_{episode_index:06d}.mp4", + "features": { + "observation.state": { + "dtype": "float32", + "shape": [ + 19 + ], + "names": null + }, + "action": { + "dtype": "float32", + "shape": [ + 19 + ], + "names": null + }, + "observation.image": { + "dtype": "video", + "shape": [ + 3, + 720, + 960 + ], + "names": [ + "channel", + "height", + "width" + ], + "info": { + "video.fps": 30.0, + "video.height": 720, + "video.width": 960, + "video.channels": 3, + "video.codec": "h264", + "video.pix_fmt": "yuv420p", + "video.is_depth_map": false, + "has_audio": false + } + }, + "timestamp": { + "dtype": "float32", + "shape": [ + 1 + ], + "names": null + }, + "frame_index": { + "dtype": "int64", + "shape": [ + 1 + ], + "names": null + }, + "episode_index": { + "dtype": "int64", + "shape": [ + 1 + ], + "names": null + }, + "index": { + "dtype": "int64", + "shape": [ + 1 + ], + "names": null + }, + "task_index": { + "dtype": "int64", + "shape": [ + 1 + ], + "names": null + } + } +} +``` + + +## Citation + +**BibTeX:** + +```bibtex +[More Information Needed] +``` \ No newline at end of file diff --git a/tests/assets/lerobot/apple_storage/data/chunk-000/episode_000000.parquet b/tests/assets/lerobot/apple_storage/data/chunk-000/episode_000000.parquet new file mode 100644 index 000000000000..aeaecb6b2e45 --- /dev/null +++ b/tests/assets/lerobot/apple_storage/data/chunk-000/episode_000000.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7b5e1e8ef317d14ff6981995046adce76fd23fc8db47578690743ca162a9f79 +size 67188 diff --git a/tests/assets/lerobot/apple_storage/data/chunk-000/episode_000001.parquet b/tests/assets/lerobot/apple_storage/data/chunk-000/episode_000001.parquet new file mode 100644 index 000000000000..6af093da1b3b --- /dev/null +++ b/tests/assets/lerobot/apple_storage/data/chunk-000/episode_000001.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:926883df3300c625c520c643184e65a60b795bdcdcd2753f8843635beb4d0c05 +size 67605 diff --git a/tests/assets/lerobot/apple_storage/data/chunk-000/episode_000002.parquet b/tests/assets/lerobot/apple_storage/data/chunk-000/episode_000002.parquet new file mode 100644 index 000000000000..b7bfa60a0115 --- /dev/null +++ b/tests/assets/lerobot/apple_storage/data/chunk-000/episode_000002.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd8ceb193183ed6909d10e718024dde6118ee6d522d40abf5cb6c410afdf422 +size 67481 diff --git a/tests/assets/lerobot/apple_storage/meta/episodes.jsonl b/tests/assets/lerobot/apple_storage/meta/episodes.jsonl new file mode 100644 index 000000000000..21ce0595d156 --- /dev/null +++ b/tests/assets/lerobot/apple_storage/meta/episodes.jsonl @@ -0,0 +1,3 @@ +{"episode_index": 0, "tasks": ["place the apple in the plate"], "length": 299} +{"episode_index": 1, "tasks": ["place the apple in the plate"], "length": 300} +{"episode_index": 2, "tasks": ["place the apple in the plate"], "length": 300} diff --git a/tests/assets/lerobot/apple_storage/meta/info.json b/tests/assets/lerobot/apple_storage/meta/info.json new file mode 100644 index 000000000000..58171dd13196 --- /dev/null +++ b/tests/assets/lerobot/apple_storage/meta/info.json @@ -0,0 +1,90 @@ +{ + "codebase_version": "v2.0", + "robot_type": "reachy2", + "total_episodes": 3, + "total_frames": 14983, + "total_tasks": 1, + "total_videos": 50, + "total_chunks": 1, + "chunks_size": 1000, + "fps": 30, + "splits": { + "train": "0:50" + }, + "data_path": "data/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.parquet", + "video_path": "videos/chunk-{episode_chunk:03d}/{video_key}/episode_{episode_index:06d}.mp4", + "features": { + "observation.state": { + "dtype": "float32", + "shape": [ + 19 + ], + "names": null + }, + "action": { + "dtype": "float32", + "shape": [ + 19 + ], + "names": null + }, + "observation.image": { + "dtype": "video", + "shape": [ + 3, + 720, + 960 + ], + "names": [ + "channel", + "height", + "width" + ], + "info": { + "video.fps": 30.0, + "video.height": 720, + "video.width": 960, + "video.channels": 3, + "video.codec": "h264", + "video.pix_fmt": "yuv420p", + "video.is_depth_map": false, + "has_audio": false + } + }, + "timestamp": { + "dtype": "float32", + "shape": [ + 1 + ], + "names": null + }, + "frame_index": { + "dtype": "int64", + "shape": [ + 1 + ], + "names": null + }, + "episode_index": { + "dtype": "int64", + "shape": [ + 1 + ], + "names": null + }, + "index": { + "dtype": "int64", + "shape": [ + 1 + ], + "names": null + }, + "task_index": { + "dtype": "int64", + "shape": [ + 1 + ], + "names": null + } + } +} diff --git a/tests/assets/lerobot/apple_storage/meta/stats.json b/tests/assets/lerobot/apple_storage/meta/stats.json new file mode 100644 index 000000000000..06ae3940a169 --- /dev/null +++ b/tests/assets/lerobot/apple_storage/meta/stats.json @@ -0,0 +1,314 @@ +{ + "observation.state": { + "mean": [ + 0.2746424973011017, + 0.13668131828308105, + -0.12526904046535492, + -1.789839506149292, + 0.2267553061246872, + 0.028956126421689987, + 0.5540997982025146, + 2.268596649169922, + 0.048213887959718704, + -0.14650987088680267, + 0.21712316572666168, + -1.5627491474151611, + -0.0020320580806583166, + 0.031526923179626465, + -0.3660394847393036, + 2.083613872528076, + 0.23164288699626923, + 0.4508134424686432, + -0.27841347455978394 + ], + "std": [ + 0.18822793662548065, + 0.02976108342409134, + 0.08715762943029404, + 0.09507708996534348, + 0.13658691942691803, + 0.23742514848709106, + 0.11697406321763992, + 0.00045329699059948325, + 0.29482150077819824, + 0.17917588353157043, + 0.2492632269859314, + 0.30189529061317444, + 0.3174964487552643, + 0.22075548768043518, + 0.20681971311569214, + 0.32018476724624634, + 0.13045811653137207, + 0.07697775214910507, + 0.12492404133081436 + ], + "max": [ + 0.6361467242240906, + 0.2218237966299057, + 0.1945919394493103, + -1.536672592163086, + 0.7294800877571106, + 0.3192594647407532, + 1.44472336769104, + 2.270291566848755, + 0.6540043950080872, + 0.16457566618919373, + 1.5770719051361084, + -0.2877470850944519, + 0.7418605089187622, + 0.673677384853363, + 0.4906846880912781, + 2.2764275074005127, + 0.4576607644557953, + 0.6002069711685181, + 0.11025071889162064 + ], + "min": [ + 0.045116838067770004, + 0.08657767623662949, + -0.26686665415763855, + -2.0993292331695557, + -0.07790473848581314, + -0.37823358178138733, + -0.09848397970199585, + 2.267223596572876, + -0.9145553112030029, + -1.1959130764007568, + -0.2340105026960373, + -2.2152175903320312, + -0.7415784597396851, + -0.6358225345611572, + -1.3001906871795654, + -0.018407821655273438, + -0.06162093207240105, + -0.0760156512260437, + -0.7120583057403564 + ] + }, + "action": { + "mean": [ + 0.27469879388809204, + 0.13673244416713715, + -0.1250300109386444, + -1.7895355224609375, + 0.22706496715545654, + 0.02881854586303234, + 0.5540575981140137, + 2.268928050994873, + 0.04698758199810982, + -0.14645721018314362, + 0.21712318062782288, + -1.5603079795837402, + -0.0034558437764644623, + 0.03151923790574074, + -0.3665141761302948, + 1.7187833786010742, + 0.23131278157234192, + 0.4507697522640228, + -0.2777464985847473 + ], + "std": [ + 0.18824636936187744, + 0.02981923520565033, + 0.08747690171003342, + 0.09504380822181702, + 0.13675345480442047, + 0.2374909371137619, + 0.11755365133285522, + 0.0, + 0.2948968708515167, + 0.17994415760040283, + 0.25070616602897644, + 0.30324867367744446, + 0.31788700819015503, + 0.22102680802345276, + 0.20713569223880768, + 0.9815908074378967, + 0.13042518496513367, + 0.07720942795276642, + 0.12498199939727783 + ], + "max": [ + 0.6372604966163635, + 0.22220447659492493, + 0.21267913281917572, + -1.5297168493270874, + 0.7376750707626343, + 0.3222302496433258, + 1.5321130752563477, + 2.268928050994873, + 0.654927134513855, + 0.16686232388019562, + 1.5889588594436646, + -0.2752054035663605, + 0.7417648434638977, + 0.6762262582778931, + 0.5384510159492493, + 2.268928050994873, + 0.4580623507499695, + 0.600703239440918, + 0.11472026258707047 + ], + "min": [ + 0.044551920145750046, + 0.08648021519184113, + -0.26772359013557434, + -2.1069464683532715, + -0.0780634805560112, + -0.37880757451057434, + -0.1574927717447281, + 2.268928050994873, + -0.9419990181922913, + -1.2163200378417969, + -0.25598475337028503, + -2.2165682315826416, + -0.7417607307434082, + -0.6433748602867126, + -1.3031400442123413, + -0.0872664600610733, + -0.06462357938289642, + -0.07802599668502808, + -0.7134205102920532 + ] + }, + "observation.image": { + "mean": [ + [ + [ + 0.5158140063285828 + ] + ], + [ + [ + 0.4342783987522125 + ] + ], + [ + [ + 0.28265076875686646 + ] + ] + ], + "std": [ + [ + [ + 0.282068133354187 + ] + ], + [ + [ + 0.25686660408973694 + ] + ], + [ + [ + 0.24023106694221497 + ] + ] + ], + "max": [ + [ + [ + 1.0 + ] + ], + [ + [ + 1.0 + ] + ], + [ + [ + 1.0 + ] + ] + ], + "min": [ + [ + [ + 0.0 + ] + ], + [ + [ + 0.0 + ] + ], + [ + [ + 0.0 + ] + ] + ] + }, + "timestamp": { + "mean": [ + 4.977675437927246 + ], + "std": [ + 2.883486747741699 + ], + "max": [ + 9.966666221618652 + ], + "min": [ + 0.0 + ] + }, + "frame_index": { + "mean": [ + 149.33050537109375 + ], + "std": [ + 86.5045166015625 + ], + "max": [ + 299.0 + ], + "min": [ + 0.0 + ] + }, + "episode_index": { + "mean": [ + 24.497207641601562 + ], + "std": [ + 14.42776870727539 + ], + "max": [ + 2.0 + ], + "min": [ + 0.0 + ] + }, + "index": { + "mean": [ + 7490.99951171875 + ], + "std": [ + 4325.21923828125 + ], + "max": [ + 14982.0 + ], + "min": [ + 0.0 + ] + }, + "task_index": { + "mean": [ + 0.0 + ], + "std": [ + 0.0 + ], + "max": [ + 0.0 + ], + "min": [ + 0.0 + ] + } +} diff --git a/tests/assets/lerobot/apple_storage/meta/tasks.jsonl b/tests/assets/lerobot/apple_storage/meta/tasks.jsonl new file mode 100644 index 000000000000..fe3928200cba --- /dev/null +++ b/tests/assets/lerobot/apple_storage/meta/tasks.jsonl @@ -0,0 +1 @@ +{"task_index": 0, "task": "place the apple in the plate"} diff --git a/tests/assets/lerobot/apple_storage/videos/chunk-000/observation.image/episode_000000.mp4 b/tests/assets/lerobot/apple_storage/videos/chunk-000/observation.image/episode_000000.mp4 new file mode 100644 index 000000000000..221bdc10c4cf --- /dev/null +++ b/tests/assets/lerobot/apple_storage/videos/chunk-000/observation.image/episode_000000.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80a0886dfb9c32b0104484859055ca4f5bb1c91f11c33625c847608eae22ac2b +size 8054090 diff --git a/tests/assets/lerobot/apple_storage/videos/chunk-000/observation.image/episode_000001.mp4 b/tests/assets/lerobot/apple_storage/videos/chunk-000/observation.image/episode_000001.mp4 new file mode 100644 index 000000000000..f925a6836c89 --- /dev/null +++ b/tests/assets/lerobot/apple_storage/videos/chunk-000/observation.image/episode_000001.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4d2d97bfbf928e5a43ef12b4dcb05cf7b60193ccd0d7dc416c795bc10cdbbda +size 8020897 diff --git a/tests/assets/lerobot/apple_storage/videos/chunk-000/observation.image/episode_000002.mp4 b/tests/assets/lerobot/apple_storage/videos/chunk-000/observation.image/episode_000002.mp4 new file mode 100644 index 000000000000..24d9c0c49162 --- /dev/null +++ b/tests/assets/lerobot/apple_storage/videos/chunk-000/observation.image/episode_000002.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c703578d14535eda8f06bb81a414055274fd4250a1b3ffc9476b60828d4d823 +size 7673113 diff --git a/tests/python/release_checklist/check_lerobot_dataloader.py b/tests/python/release_checklist/check_lerobot_dataloader.py new file mode 100644 index 000000000000..34783ec7096c --- /dev/null +++ b/tests/python/release_checklist/check_lerobot_dataloader.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import os +from argparse import Namespace +from uuid import uuid4 + +import rerun as rr + +README = """\ +# LeRobot dataloader check + +This will load an entire LeRobot dataset -- simply make sure that it does 🙃 + +The LeRobot dataset loader works by creating a new _recording_ (⚠)️ for each episode in the dataset. +I.e., you should see a bunch of recordings below this readme (10, to be exact). +""" + + +def log_readme() -> None: + rr.log("readme", rr.TextDocument(README, media_type=rr.MediaType.MARKDOWN), static=True) + + +def run(args: Namespace) -> None: + rr.script_setup(args, f"{os.path.basename(__file__)}", recording_id=uuid4()) + + # NOTE: This dataloader works by creating a new recording for each episode. + # Those recordings all share the same application_id though, which means they also share + # the same blueprint: we cannot log a readme, or all the recordings would show an empty readme. + # log_readme() + print(README) + + dataset_path = os.path.dirname(__file__) + "/../../../tests/assets/lerobot/apple_storage" + rr.log_file_from_path(dataset_path) + + rr.send_blueprint(rr.blueprint.Blueprint(auto_layout=True, auto_views=True), make_active=True, make_default=True) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Interactive release checklist") + rr.script_add_args(parser) + args = parser.parse_args() + run(args)