diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..7bf231e794542a83b16930baa79926b163ee8d78 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,5 @@ +*.webp filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.jpg filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +*.bag filter=lfs diff=lfs merge=lfs -text diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..45f3f48a4b0a745a7b37398ae44b0a8d6fa3f58b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,54 @@ +FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 + +ENV DEBIAN_FRONTEND=noninteractive + +# System deps + Python 3.10 + ffmpeg +RUN apt-get update && apt-get install -y --no-install-recommends \ + git git-lfs wget curl ca-certificates \ + libgl1-mesa-glx libglib2.0-0 \ + software-properties-common \ + && add-apt-repository ppa:deadsnakes/ppa -y \ + && add-apt-repository ppa:ubuntuhandbook1/ffmpeg6 -y \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + python3.10 python3.10-venv python3.10-dev \ + ffmpeg \ + && git lfs install \ + && rm -rf /var/lib/apt/lists/* + +# Make python3.10 the default +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 \ + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 + +# Install pip +RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python + +# Install PyTorch with CUDA 12.4 support +RUN pip install --no-cache-dir \ + torch torchvision torchaudio \ + --index-url https://download.pytorch.org/whl/cu124 + +# Clone our repo (includes our modified lerobot) +RUN huggingface-cli download StrongRoboticsLab/pi05-so100-diverse \ + --local-dir /workspace/pi05-so100-diverse \ + --exclude "logs/*" + +# Install our modified lerobot + other deps +RUN pip install --no-cache-dir \ + -e /workspace/pi05-so100-diverse/lerobot \ + "transformers==4.54.1" \ + "accelerate>=0.34" \ + wandb \ + huggingface_hub \ + hf_xet + +# Install Node.js + Claude Code +RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \ + && apt-get install -y nodejs \ + && rm -rf /var/lib/apt/lists/* \ + && npm install -g @anthropic-ai/claude-code + +WORKDIR /workspace/pi05-so100-diverse + +ENTRYPOINT ["/bin/bash", "-c"] +CMD ["bash"] diff --git a/bootstrap.sh b/bootstrap.sh new file mode 100755 index 0000000000000000000000000000000000000000..248b0e4d158e46ede921af494d1d99f893f97df6 --- /dev/null +++ b/bootstrap.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# One-click bootstrap: builds Docker image, downloads dataset, starts training. +# Usage: HF_TOKEN=xxx WANDB_API_KEY=xxx bash bootstrap.sh + +set -e + +if [ -z "$HF_TOKEN" ]; then echo "ERROR: export HF_TOKEN first"; exit 1; fi +if [ -z "$WANDB_API_KEY" ]; then echo "ERROR: export WANDB_API_KEY first"; exit 1; fi + +DATASET_DIR="${DATASET_DIR:-/ephemeral/community_dataset_v3}" +REPO_DIR="${REPO_DIR:-/workspace/pi05-so100-diverse}" +NUM_GPUS="${NUM_GPUS:-1}" + +echo "=== Step 1: Clone repo ===" +if [ ! -d "$REPO_DIR" ]; then + git clone https://huggingface.co/StrongRoboticsLab/pi05-so100-diverse "$REPO_DIR" +else + echo "Repo already cloned, skipping" +fi + +echo "=== Step 2: Build Docker image ===" +cd "$REPO_DIR" +if ! docker images pi05-training --format '{{.ID}}' | grep -q .; then + docker build -t pi05-training . +else + echo "Image already built, skipping" +fi + +echo "=== Step 3: Preflight checks ===" +docker run --rm --runtime=nvidia \ + -e HF_TOKEN="$HF_TOKEN" \ + pi05-training "bash /workspace/pi05-so100-diverse/preflight.sh" + +if [ "${SKIP_DOWNLOAD:-0}" != "1" ]; then + echo "=== Step 4: Download dataset ===" + mkdir -p "$DATASET_DIR" + docker run --rm \ + -e HF_TOKEN="$HF_TOKEN" \ + -e HF_XET_HIGH_PERFORMANCE=1 \ + -v "$(dirname $DATASET_DIR):$(dirname $DATASET_DIR)" \ + pi05-training "huggingface-cli download \ + --repo-type dataset \ + HuggingFaceVLA/community_dataset_v3 \ + --local-dir $DATASET_DIR \ + --token \$HF_TOKEN" +else + echo "=== Step 4: Skipped (SKIP_DOWNLOAD=1) ===" +fi + +echo "=== Step 5: Start training ===" +docker run --rm --runtime=nvidia \ + --ipc=host \ + --ulimit memlock=-1 \ + --ulimit stack=67108864 \ + -e HF_TOKEN="$HF_TOKEN" \ + -e WANDB_API_KEY="$WANDB_API_KEY" \ + -e NUM_GPUS="$NUM_GPUS" \ + -e DATASET_DIR="$DATASET_DIR" \ + -v /ephemeral:/ephemeral \ + -v "$REPO_DIR:/workspace/pi05-so100-diverse" \ + -e PYTHONPATH="" \ + pi05-training "pip install -q 'transformers>=5.0.0' \ + && SITE_PKG=\$(python3 -c \"import lerobot,os; print(os.path.dirname(lerobot.__path__[0]))\") \ + && bash /workspace/pi05-so100-diverse/lerobot_patches/apply.sh \$SITE_PKG \ + && bash /workspace/pi05-so100-diverse/train_cloud.sh" diff --git a/cleanup_dataset.py b/cleanup_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..2e594180dd7214e2851315b1410ab5df60c71f8d --- /dev/null +++ b/cleanup_dataset.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +"""Delete all files from the dataset that aren't in filtered_index.json.""" + +import json +import os +import shutil +import sys +from collections import defaultdict +from pathlib import Path + + +def main(): + index_path = sys.argv[1] if len(sys.argv) > 1 else "filtered_index.json" + dataset_dir = sys.argv[2] if len(sys.argv) > 2 else "/ephemeral/community_dataset_v3" + + with open(index_path) as f: + index = json.load(f) + + # Build set of needed directories (contributor/dataset) + needed_datasets = set() + for ep in index["episodes"]: + needed_datasets.add(ep["dataset"]) + + # Walk the dataset dir and find all contributor/dataset dirs + dataset_root = Path(dataset_dir) + deleted_bytes = 0 + deleted_dirs = 0 + + for contributor_dir in sorted(dataset_root.iterdir()): + if not contributor_dir.is_dir() or contributor_dir.name.startswith("."): + continue + + for ds_dir in sorted(contributor_dir.iterdir()): + if not ds_dir.is_dir(): + continue + + dataset_name = f"{contributor_dir.name}/{ds_dir.name}" + if dataset_name not in needed_datasets: + # Get size before deleting + size = sum(f.stat().st_size for f in ds_dir.rglob("*") if f.is_file()) + shutil.rmtree(ds_dir) + deleted_bytes += size + deleted_dirs += 1 + if deleted_dirs % 50 == 0: + print(f" Deleted {deleted_dirs} datasets, freed {deleted_bytes / 1024**3:.1f}GB", flush=True) + + # Remove empty contributor dirs + if contributor_dir.exists() and not any(contributor_dir.iterdir()): + contributor_dir.rmdir() + + # Also delete the .cache dir + cache_dir = dataset_root / ".cache" + if cache_dir.exists(): + cache_size = sum(f.stat().st_size for f in cache_dir.rglob("*") if f.is_file()) + shutil.rmtree(cache_dir) + deleted_bytes += cache_size + print(f" Deleted .cache ({cache_size / 1024**3:.1f}GB)") + + print(f"\nDone: deleted {deleted_dirs} unused datasets, freed {deleted_bytes / 1024**3:.1f}GB") + + +if __name__ == "__main__": + main() diff --git a/download_dataset.sh b/download_dataset.sh new file mode 100644 index 0000000000000000000000000000000000000000..364bee15c96b0c138ba76da0d66d11bee5cd801f --- /dev/null +++ b/download_dataset.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Download the full community_dataset_v3 using hfd (aria2c-based, resolver-only, no API rate limit issues) +set -e + +if [ -z "$HF_TOKEN" ]; then echo "ERROR: export HF_TOKEN first"; exit 1; fi +DATASET_DIR="${DATASET_DIR:-/ephemeral/community_dataset_v3}" + +# Install hfd if not present +if [ ! -f /usr/local/bin/hfd ]; then + wget -q https://gist.githubusercontent.com/padeoe/697678ab8e528b85a2a7bddafea1fa4f/raw/hfd.sh -O /usr/local/bin/hfd + chmod +x /usr/local/bin/hfd +fi + +echo "Downloading dataset to $DATASET_DIR..." +echo "Using aria2c with 4 threads per file, 5 concurrent downloads" + +hfd HuggingFaceVLA/community_dataset_v3 \ + --dataset \ + --hf_token "$HF_TOKEN" \ + --tool aria2c \ + -x 4 \ + -j 5 \ + --local-dir "$DATASET_DIR" + +echo "Download complete!" diff --git a/download_subset.py b/download_subset.py new file mode 100644 index 0000000000000000000000000000000000000000..47eb8e3d211d914123cb3f5d217072288c688e30 --- /dev/null +++ b/download_subset.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python3 +""" +Download only the files needed for training (defined by filtered_index.json) +from HuggingFaceVLA/community_dataset_v3. + +Rate-limit-aware greedy scheduler: downloads small files when we have rate limit +headroom, swaps to large files (videos) when approaching the limit to keep +bandwidth busy while the window recovers. Goal: never actually hit a 429. +""" + +import argparse +import json +import os +import sys +import time +import threading +from collections import defaultdict, deque +from concurrent.futures import ThreadPoolExecutor, as_completed, Future + +from huggingface_hub import hf_hub_download + +RATE_LIMIT = 100 # 1000 actual / ~10 API calls per hf_hub_download +RATE_WINDOW = 300 # 5 minutes + + +class RateLimitTracker: + """Sliding window request counter.""" + def __init__(self): + self.lock = threading.Lock() + self.timestamps: deque[float] = deque() + + def record(self): + now = time.time() + with self.lock: + self.timestamps.append(now) + self._prune(now) + + def count(self) -> int: + now = time.time() + with self.lock: + self._prune(now) + return len(self.timestamps) + + def headroom(self) -> int: + """How many more requests we can make in this window.""" + return max(0, RATE_LIMIT - self.count()) + + def wait_if_needed(self): + """If we've exhausted the window, sleep until oldest request expires.""" + while self.headroom() <= 0: + with self.lock: + if self.timestamps: + wait = RATE_WINDOW - (time.time() - self.timestamps[0]) + 1 + if wait > 0: + print(f" Rate limit reached, waiting {wait:.0f}s...", flush=True) + # Release lock while sleeping + else: + wait = 0 + if wait > 0: + time.sleep(wait) + + def _prune(self, now): + cutoff = now - RATE_WINDOW + while self.timestamps and self.timestamps[0] < cutoff: + self.timestamps.popleft() + + +class FileQueue: + """Thread-safe queue that serves small or large files on demand.""" + def __init__(self, small_files: list[str], large_files: list[str]): + self.lock = threading.Lock() + self.small = deque(small_files) + self.large = deque(large_files) + self.total = len(small_files) + len(large_files) + + def get(self, prefer_small: bool) -> str | None: + with self.lock: + if prefer_small and self.small: + return self.small.popleft() + elif self.large: + return self.large.popleft() + elif self.small: + return self.small.popleft() + return None + + def remaining(self) -> int: + with self.lock: + return len(self.small) + len(self.large) + + def small_remaining(self) -> int: + with self.lock: + return len(self.small) + + def large_remaining(self) -> int: + with self.lock: + return len(self.large) + + +def build_file_lists(index_path: str, output_dir: str) -> tuple[list[str], list[str], int]: + """Returns (small_files, large_files, skipped_count) from filtered_index.json. + Skips files already on disk.""" + with open(index_path) as f: + index = json.load(f) + + datasets = defaultdict(list) + for ep in index["episodes"]: + datasets[ep["dataset"]].append(ep["episode_index"]) + + small = [] + large = [] + skipped = 0 + + def add_if_missing(filepath, target_list): + nonlocal skipped + if os.path.exists(os.path.join(output_dir, filepath)): + skipped += 1 + else: + target_list.append(filepath) + + for dataset_name, episode_indices in datasets.items(): + prefix = dataset_name + add_if_missing(f"{prefix}/meta/info.json", small) + add_if_missing(f"{prefix}/meta/tasks.jsonl", small) + add_if_missing(f"{prefix}/meta/episodes.jsonl", small) + + for ep_idx in episode_indices: + ep_str = f"episode_{ep_idx:06d}" + add_if_missing(f"{prefix}/data/chunk-000/{ep_str}.parquet", small) + add_if_missing(f"{prefix}/videos/chunk-000/observation.images.image/{ep_str}.mp4", large) + add_if_missing(f"{prefix}/videos/chunk-000/observation.images.image2/{ep_str}.mp4", large) + + return small, large, skipped + + +# Shared state +tracker = RateLimitTracker() +queue: FileQueue = None +stats_lock = threading.Lock() +downloaded = 0 +total_bytes = 0 +failed = [] +start_time = 0 + +# When headroom drops below this, prefer large files +HEADROOM_THRESHOLD = 50 + + +def worker(output_dir, token): + """Worker loop: grab a file based on rate limit state, download it, repeat.""" + global downloaded, total_bytes + + while True: + headroom = tracker.headroom() + prefer_small = headroom > HEADROOM_THRESHOLD + + filepath = queue.get(prefer_small) + if filepath is None: + return + + for attempt in range(10): + tracker.wait_if_needed() + tracker.record() + try: + path = hf_hub_download( + repo_id="HuggingFaceVLA/community_dataset_v3", + repo_type="dataset", + filename=filepath, + local_dir=output_dir, + token=token, + ) + size = os.path.getsize(path) + with stats_lock: + downloaded += 1 + total_bytes += size + _maybe_log() + break + except Exception as e: + if "429" in str(e) and attempt < 9: + time.sleep(30 * (attempt + 1)) + continue + with stats_lock: + failed.append((filepath, str(e))) + _maybe_log() + break + + +def _maybe_log(): + """Log progress every 100 files. Must be called with stats_lock held.""" + total = downloaded + len(failed) + if total % 100 == 0 and total > 0: + elapsed = time.time() - start_time + rate = total / elapsed if elapsed > 0 else 0 + mb_s = (total_bytes / 1024 / 1024) / elapsed if elapsed > 0 else 0 + gb_done = total_bytes / 1024 / 1024 / 1024 + headroom = tracker.headroom() + remaining = queue.remaining() + est_min = remaining / rate / 60 if rate > 0 else 0 + print(f" [{total}/{queue.total}] {gb_done:.1f}GB, " + f"{mb_s:.0f} MB/s, {rate:.1f} files/s, " + f"headroom: {headroom}/{RATE_LIMIT}, " + f"queued: {queue.small_remaining()}s+{queue.large_remaining()}L, " + f"~{est_min:.0f}min left", flush=True) + + +def main(): + global queue, start_time + + parser = argparse.ArgumentParser(description="Download training subset from community_dataset_v3") + parser.add_argument("--index", type=str, default="filtered_index.json") + parser.add_argument("--output", type=str, default="/data/community_dataset_v3") + parser.add_argument("--token", type=str, default=os.environ.get("HF_TOKEN")) + parser.add_argument("--workers", type=int, default=8) + args = parser.parse_args() + + if not args.token: + print("ERROR: Set HF_TOKEN or pass --token") + return + + small, large, skipped = build_file_lists(args.index, args.output) + queue = FileQueue(small, large) + + print(f"Files to download: {queue.total} ({skipped} already on disk, skipped)") + print(f" Small (metadata+parquets): {len(small)}") + print(f" Large (videos): {len(large)}") + print(f" Workers: {args.workers}") + print(f" Rate limit: {RATE_LIMIT}/{RATE_WINDOW}s, " + f"swap to large files at <{HEADROOM_THRESHOLD} headroom") + print() + + start_time = time.time() + + with ThreadPoolExecutor(max_workers=args.workers) as pool: + futures = [pool.submit(worker, args.output, args.token) + for _ in range(args.workers)] + for f in futures: + f.result() + + elapsed = time.time() - start_time + gb_total = total_bytes / 1024 / 1024 / 1024 + print(f"\nDone in {elapsed/60:.1f} min: {downloaded} files, " + f"{gb_total:.1f}GB, {len(failed)} failed") + if failed: + print("Failed files:") + for f, err in failed[:20]: + print(f" {f}: {err}") + if len(failed) > 20: + print(f" ... and {len(failed) - 20} more") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/lerobot/.dockerignore b/lerobot/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..c0d8a84b566aef1f4bf9e7009875decfccfc0125 --- /dev/null +++ b/lerobot/.dockerignore @@ -0,0 +1,160 @@ +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Misc +.git +tmp +wandb +data +outputs +.vscode +rl +media + + +# Logging +logs + +# HPC +nautilus/*.yaml +*.key + +# Slurm +sbatch*.sh + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +!tests/artifacts +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Ignore .cache except calibration +.cache/* +!.cache/calibration/ +!.cache/calibration/** + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/lerobot/.gitattributes b/lerobot/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..7d89f37b2bf0c1443508914a62697c1c69f8920f --- /dev/null +++ b/lerobot/.gitattributes @@ -0,0 +1,21 @@ +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*.memmap filter=lfs diff=lfs merge=lfs -text +*.stl filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +*.mp4 filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.json !text !filter !merge !diff +tests/artifacts/cameras/*.png filter=lfs diff=lfs merge=lfs -text +*.bag filter=lfs diff=lfs merge=lfs -text diff --git a/lerobot/.github/ISSUE_TEMPLATE/bug-report.yml b/lerobot/.github/ISSUE_TEMPLATE/bug-report.yml new file mode 100644 index 0000000000000000000000000000000000000000..9f602de30960a5ddaed9ed04272a2981718c1f2a --- /dev/null +++ b/lerobot/.github/ISSUE_TEMPLATE/bug-report.yml @@ -0,0 +1,94 @@ +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: "🚀 Issue / Bug / Request" +description: Report a bug, suggest an improvement, or ask a technical question. +body: + - type: markdown + attributes: + value: | + ### Thanks for contributing to LeRobot! 🙌 + Please choose the most relevant sections below. If this is a general "how-to" question, consider our [Discord](https://discord.gg/s3KuuzsPFb) for faster community support. + + - type: dropdown + id: issue-type + attributes: + label: Ticket Type + description: What kind of ticket are you opening? + options: + - "🐛 Bug Report (Something isn't working)" + - "💡 Feature Request / Improvement" + - "❓ Technical Question" + - "🧹 Maintenance / Documentation" + validations: + required: true + + - type: textarea + id: system-info + attributes: + label: Environment & System Info + description: | + For bugs or technical questions, please run `lerobot-info` and paste the output. + (Optional for feature requests). + render: Shell + placeholder: lerobot version, OS, python version, etc. + + - type: textarea + id: description + validations: + required: true + attributes: + label: Description + description: | + Provide a clear summary of the issue or your proposal. + - **Bugs:** What is happening? + - **Features:** What is the goal/use case? + - **Questions:** What are you trying to achieve? + placeholder: | + A clear and concise description of the issue or suggestion. + + - type: textarea + id: context-repro + attributes: + label: Context & Reproduction + description: | + Provide a code snippet, steps to reproduce a bug, or technical details about your proposal. + Please use code blocks for scripts and CLI commands. + placeholder: | + Steps to reproduce / Usage example: + 1. + 2. + 3. + + - type: textarea + id: logs + attributes: + label: Relevant logs or stack trace + description: If applicable, paste relevant error logs here. + render: Shell + + - type: checkboxes + id: extras + attributes: + label: Checklist + options: + - label: I have searched existing tickets to ensure this isn't a duplicate. + - label: I am using the latest version of the `main` branch. + - label: I have verified this is not an environment-specific problem. + + - type: textarea + id: workaround + attributes: + label: Additional Info / Workarounds + description: Anything else we should know? If you have a workaround, please share it! diff --git a/lerobot/.github/PULL_REQUEST_TEMPLATE.md b/lerobot/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000000000000000000000000000000000000..43e2442d323059f04a23eaf181ab18750da08acf --- /dev/null +++ b/lerobot/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,55 @@ +## Title + +Short, imperative summary (e.g., "fix(robots): handle None in sensor parser"). See [CONTRIBUTING.md](../CONTRIBUTING.md) for PR conventions. + +## Type / Scope + +- **Type**: (Bug | Feature | Docs | Performance | Test | CI | Chore) +- **Scope**: (optional — name of module or package affected) + +## Summary / Motivation + +- One-paragraph description of what changes and why. +- Why this change is needed and any trade-offs or design notes. + +## Related issues + +- Fixes / Closes: # (if any) +- Related: # (if any) + +## What changed + +- Short, concrete bullets of the modifications (files/behaviour). +- Short note if this introduces breaking changes and migration steps. + +## How was this tested (or how to run locally) + +- Tests added: list new tests or test files. +- Manual checks / dataset runs performed. +- Instructions for the reviewer + +Example: + +- Ran the relevant tests: + + ```bash + pytest -q tests/ -k + ``` + +- Reproduce with a quick example or CLI (if applicable): + + ```bash + lerobot-train --some.option=true + ``` + +## Checklist (required before merge) + +- [ ] Linting/formatting run (`pre-commit run -a`) +- [ ] All tests pass locally (`pytest`) +- [ ] Documentation updated +- [ ] CI is green + +## Reviewer notes + +- Anything the reviewer should focus on (performance, edge-cases, specific files) or general notes. +- Anyone in the community is free to review the PR. diff --git a/lerobot/.github/labeler.yml b/lerobot/.github/labeler.yml new file mode 100644 index 0000000000000000000000000000000000000000..d3c5cc622c89e8260df64353e55bdc1f9af9ade8 --- /dev/null +++ b/lerobot/.github/labeler.yml @@ -0,0 +1,69 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CI: + - changed-files: + - any-glob-to-any-file: + - '.github/**' + - 'docker/**' + +github_actions: + - changed-files: + - any-glob-to-any-file: '.github/**' + +documentation: + - changed-files: + - any-glob-to-any-file: + - '**/*.md' + - '**/*.mdx' + - 'docs/**' + +examples: + - changed-files: + - any-glob-to-any-file: 'examples/**' + +tests: + - changed-files: + - any-glob-to-any-file: 'tests/**' + +sensors: + - changed-files: + - any-glob-to-any-file: 'src/lerobot/cameras/**' + +configuration: + - changed-files: + - any-glob-to-any-file: 'src/lerobot/configs/**' + +dataset: + - changed-files: + - any-glob-to-any-file: 'src/lerobot/datasets/**' + +evaluation: + - changed-files: + - any-glob-to-any-file: 'src/lerobot/envs/**' + +robots: + - changed-files: + - any-glob-to-any-file: + - 'src/lerobot/teleoperators/**' + - 'src/lerobot/robots/**' + - 'src/lerobot/motors/**' + +policies: + - changed-files: + - any-glob-to-any-file: 'src/lerobot/policies/**' + +processor: + - changed-files: + - any-glob-to-any-file: 'src/lerobot/processor/**' diff --git a/lerobot/.github/workflows/documentation-upload-pr.yml b/lerobot/.github/workflows/documentation-upload-pr.yml new file mode 100644 index 0000000000000000000000000000000000000000..6ee2a5caad0618009ec49156d67844ded9dc29ba --- /dev/null +++ b/lerobot/.github/workflows/documentation-upload-pr.yml @@ -0,0 +1,41 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This workflow uploads the documentation preview built for a PR and comments the link on the PR. +name: Documentation PR Upload +permissions: + contents: read + pull-requests: write + +on: + # Triggered by the completion of the main 'Documentation' workflow. + workflow_run: # zizmor: ignore[dangerous-triggers] We follow the same pattern as in Transformers + workflows: ["Documentation"] + types: + - completed + +jobs: + # This job uploads a preview of the documentation for a pull request. + upload_and_comment: + name: Upload Preview and Comment + if: > + github.event.workflow_run.event == 'pull_request' && + github.event.workflow_run.conclusion == 'success' && + github.repository == 'huggingface/lerobot' + uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main + with: + package_name: lerobot + secrets: + hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} + comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} diff --git a/lerobot/.github/workflows/documentation.yml b/lerobot/.github/workflows/documentation.yml new file mode 100644 index 0000000000000000000000000000000000000000..c7926c54233baddd2d29685e777332bbf07db747 --- /dev/null +++ b/lerobot/.github/workflows/documentation.yml @@ -0,0 +1,86 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This workflow handles building documentation for both main branches and PRs. +name: Documentation + +on: + # Allows running this workflow manually from the Actions tab + workflow_dispatch: + inputs: + version: + description: 'Version tag (e.g. v0.1.2) - Leave empty for standard main build' + required: false + type: string + + # Triggers the workflow on push events to main for the docs folder + push: + branches: + - main + paths: + - "docs/**" + + # Triggers the workflow on pull request events targeting main for the docs folder + pull_request: + branches: + - main + paths: + - "docs/**" + + release: + types: [published] + +# Ensures that only the latest commit for a PR or branch is built, canceling older runs. +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + # This job builds and deploys the official documentation. + build_main_docs: + name: Build Main Docs + if: > + (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || github.event_name == 'release') && + github.repository == 'huggingface/lerobot' + permissions: + contents: read + uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main + with: + commit_sha: ${{ github.sha }} + package: lerobot + additional_args: >- + --not_python_module + ${{ + (github.event_name == 'release' && format('--version {0}', github.event.release.tag_name)) || + (inputs.version != '' && format('--version {0}', inputs.version)) || + '' + }} + secrets: + token: ${{ secrets.HUGGINGFACE_PUSH }} + hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} + + # This job builds a preview of the documentation for a pull request. + # The result of this job triggers the 'Upload PR Documentation' workflow. + build_pr_docs: + name: Build PR Docs + if: github.event_name == 'pull_request' && github.repository == 'huggingface/lerobot' + permissions: + contents: read + pull-requests: write + uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main + with: + commit_sha: ${{ github.event.pull_request.head.sha }} + pr_number: ${{ github.event.number }} + package: lerobot + additional_args: --not_python_module diff --git a/lerobot/.github/workflows/fast_tests.yml b/lerobot/.github/workflows/fast_tests.yml new file mode 100644 index 0000000000000000000000000000000000000000..fc169e25341163a395d95aeecfc8a9ec59b643ff --- /dev/null +++ b/lerobot/.github/workflows/fast_tests.yml @@ -0,0 +1,100 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This workflow handles fast testing. +name: Fast Tests + +on: + # Allows running this workflow manually from the Actions tab + workflow_dispatch: + + pull_request: + branches: + - main + paths: + - "src/**" + - "tests/**" + - ".github/workflows/**" + - "pyproject.toml" + - "Makefile" + push: + branches: + - main + paths: + - "src/**" + - "tests/**" + - ".github/workflows/**" + - "pyproject.toml" + - "Makefile" + +permissions: + contents: read + +# Sets up the environment variables +env: + UV_VERSION: "0.8.0" + PYTHON_VERSION: "3.12" + +# Ensures that only the latest commit for a PR or branch is built, canceling older runs. +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + # This job runs pytests with the default dependencies. + # It runs everytime we commit to a PR or push to main + fast-pytest-tests: + name: Fast Pytest Tests + runs-on: ubuntu-latest + env: + MUJOCO_GL: egl + HF_HOME: /mnt/cache/.cache/huggingface + HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot + HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }} + steps: + - uses: actions/checkout@v6 + with: + persist-credentials: false + lfs: true + + # NOTE(Steven): Mount to `/mnt` to avoid the limited storage on `/home`. Consider cleaning default SDKs or using self-hosted runners for more space. + # (As of 2024-06-10, the runner's `/home` has only 6.2 GB free—8% of its 72 GB total.) + - name: Setup /mnt storage + run: sudo chown -R $USER:$USER /mnt + + # TODO(Steven): Evaluate the need of these dependencies + - name: Install apt dependencies + run: | + sudo apt-get update && sudo apt-get install -y build-essential git \ + curl libglib2.0-0 libegl1-mesa-dev ffmpeg \ + libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev + + - name: Setup uv and Python + uses: astral-sh/setup-uv@v6 # zizmor: ignore[unpinned-uses] + with: + enable-cache: true + version: ${{ env.UV_VERSION }} + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install lerobot with test extras + run: uv sync --extra "test" + + - name: Login to Hugging Face + if: env.HF_USER_TOKEN != '' + run: | + uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential + uv run hf auth whoami + + - name: Run pytest + run: uv run pytest tests -vv --maxfail=10 diff --git a/lerobot/.github/workflows/full_tests.yml b/lerobot/.github/workflows/full_tests.yml new file mode 100644 index 0000000000000000000000000000000000000000..8b7d28123fb9d1ab6641588c77d6994b3b13d5bb --- /dev/null +++ b/lerobot/.github/workflows/full_tests.yml @@ -0,0 +1,237 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This workflow handles full testing. +name: Full Tests + +on: + # Allows running this workflow manually from the Actions tab + workflow_dispatch: + + pull_request_review: + types: [submitted] + push: + branches: + - main + paths: + - "src/**" + - "tests/**" + - ".github/workflows/**" + - "pyproject.toml" + - "Makefile" + +permissions: + contents: read + +# Sets up the environment variables +env: + UV_VERSION: "0.8.0" + PYTHON_VERSION: "3.12" + DOCKER_IMAGE_NAME: huggingface/lerobot-gpu + +# Ensures that only the latest action is built, canceling older runs. +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + + # This job runs the E2E tests + pytest with all extras + # It runs everytime a PR is approved or a push to main + full-tests: + name: Full Tests + runs-on: ubuntu-latest + if: | + (github.event_name == 'pull_request_review' && github.event.review.state == 'approved') || + github.event_name == 'push' || + github.event_name == 'workflow_dispatch' + env: + MUJOCO_GL: egl + HF_HOME: /mnt/cache/.cache/huggingface + HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot + HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }} + steps: + - uses: actions/checkout@v6 + with: + lfs: true + persist-credentials: false + + # NOTE(Steven): Mount to `/mnt` to avoid the limited storage on `/home`. Consider cleaning default SDKs or using self-hosted runners for more space. + # (As of 2024-06-10, the runner's `/home` has only 6.2 GB free—8% of its 72 GB total.) + - name: Setup /mnt storage + run: sudo chown -R $USER:$USER /mnt + + - name: Install apt dependencies + run: | + sudo apt-get update && sudo apt-get install -y build-essential \ + git curl libglib2.0-0 libegl1-mesa-dev ffmpeg libusb-1.0-0-dev \ + speech-dispatcher libgeos-dev portaudio19-dev + + - name: Setup uv and Python + uses: astral-sh/setup-uv@v6 # zizmor: ignore[unpinned-uses] + with: + enable-cache: true + version: ${{ env.UV_VERSION }} + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install lerobot with all extras + run: uv sync --extra all # TODO(Steven): Make flash-attn optional + + - name: Login to Hugging Face + if: env.HF_USER_TOKEN != '' + run: | + uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential + uv run hf auth whoami + + - name: Run pytest (all extras) + run: uv run pytest tests -vv --maxfail=10 + + - name: Run end-to-end tests + run: uv run make test-end-to-end + + # This job builds a GPU enabled image for testing + # It runs everytime a PR is approved or a push to main + # TODO(Steven): For now we skip this job for community PRs + build-and-push-docker: + name: Build and Push Docker + runs-on: + group: aws-general-8-plus + if: | + github.repository == 'huggingface/lerobot' && ( + (github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && github.event.pull_request.head.repo.fork == false) || + github.event_name == 'push' || + github.event_name == 'workflow_dispatch' + ) + outputs: + image_tag: ${{ steps.set_tag.outputs.image_tag }} + env: + GITHUB_EVENT_NAME: ${{ github.event_name }} + GITHUB_REF: ${{ github.ref }} + GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }} + steps: + - name: Set Docker image tag + id: set_tag + run: | + if [[ "${GITHUB_EVENT_NAME}" == "push" ]]; then + TAG="${DOCKER_IMAGE_NAME}:latest" + elif [[ -n "${GITHUB_PR_NUMBER}" ]]; then + TAG="${DOCKER_IMAGE_NAME}:pr-${GITHUB_PR_NUMBER}" + else + TAG="${DOCKER_IMAGE_NAME}:pr-${GITHUB_REF##*/}" + fi + echo "image_tag=$TAG" >> $GITHUB_OUTPUT + - name: Install Git LFS + run: | + sudo apt-get update + sudo apt-get install git-lfs + git lfs install + - uses: actions/checkout@v6 + with: + lfs: true + persist-credentials: false + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses] + with: + cache-binary: false + - name: Login to Docker Hub + uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses] + with: + username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }} + password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }} + - name: Build and push Docker image + uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses] + with: + context: . + file: ./docker/Dockerfile.internal + push: true + tags: ${{ steps.set_tag.outputs.image_tag }} + + # This job runs pytest with all extras in a GPU enabled host + # It runs everytime a test image is created + gpu-tests: + name: GPU Tests + needs: [build-and-push-docker] + runs-on: + group: aws-g6-4xlarge-plus + env: + HF_HOME: /home/user_lerobot/.cache/huggingface + HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot + TORCH_HOME: /home/user_lerobot/.cache/torch + TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton + HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }} + container: + image: ${{ needs.build-and-push-docker.outputs.image_tag }} # zizmor: ignore[unpinned-images] + options: --gpus all --shm-size "16gb" + credentials: + username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }} + password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }} + defaults: + run: + shell: bash + working-directory: /lerobot + steps: + - name: Login to Hugging Face + if: env.HF_USER_TOKEN != '' + run: | + hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential + hf auth whoami + - name: Fix ptxas permissions + run: chmod +x /lerobot/.venv/lib/python3.12/site-packages/triton/backends/nvidia/bin/ptxas + - name: Run pytest on GPU + run: pytest tests -vv --maxfail=10 + - name: Run end-to-end tests + run: make test-end-to-end + + # This job deletes the test image recently created + # It runs everytime after the gpu-tests have finished + delete-pr-image: + name: Delete PR Image + needs: [gpu-tests, build-and-push-docker] + if: always() && ((github.event.review.state == 'approved') || (github.event_name == 'workflow_dispatch')) && needs.build-and-push-docker.result == 'success' + runs-on: ubuntu-latest + steps: + - name: Get Docker Hub Token and Delete Image + # zizmor: ignore[template-injection] + env: + DOCKERHUB_LEROBOT_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }} + DOCKERHUB_LEROBOT_PASSWORD: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }} + IMAGE_FULL: ${{ needs.build-and-push-docker.outputs.image_tag }} + run: | + IMAGE_NAME=$(echo "$IMAGE_FULL" | cut -d':' -f1) + IMAGE_TAG=$(echo "$IMAGE_FULL" | cut -d':' -f2-) + echo "Attempting to delete image: $IMAGE_NAME:$IMAGE_TAG" + + TOKEN=$(curl -s -H "Content-Type: application/json" \ + -X POST \ + -d "{\"username\": \"$DOCKERHUB_LEROBOT_USERNAME\", \"password\": \"$DOCKERHUB_LEROBOT_PASSWORD\"}" \ + https://hub.docker.com/v2/users/login/ | jq -r .token) + + if [ "$TOKEN" == "null" ] || [ -z "$TOKEN" ]; then + echo "::error::Failed to get Docker Hub token." + exit 1 + fi + + HTTP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: JWT ${TOKEN}" \ + -X DELETE \ + https://hub.docker.com/v2/repositories/${IMAGE_NAME}/tags/$IMAGE_TAG) + + if [ "$HTTP_RESPONSE" -eq 204 ]; then + echo "Successfully deleted Docker image tag: $IMAGE_NAME:$IMAGE_TAG" + else + echo "::error::Failed to delete Docker image. HTTP status: $HTTP_RESPONSE" + exit 1 + fi + +# TODO(Steven): Check dockerimages pull in ubuntu diff --git a/lerobot/.github/workflows/issue_labeler.yml b/lerobot/.github/workflows/issue_labeler.yml new file mode 100644 index 0000000000000000000000000000000000000000..438184e3f91ae41d2bed7c2544e63d4a780cf4c3 --- /dev/null +++ b/lerobot/.github/workflows/issue_labeler.yml @@ -0,0 +1,77 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This workflow automatically labels issues based on their content. +name: Issue Labeler +on: + # Trigger on new issues and edits to existing issues + issues: + types: [opened, edited] + +permissions: + contents: read + issues: write + +jobs: + label-issue: + name: Auto Label Issue + runs-on: ubuntu-latest + if: github.repository == 'huggingface/lerobot' + steps: + - uses: actions/github-script@v8 + with: + script: | + // Setup Input Text + const body = (context.payload.issue.body || ''); + const title = (context.payload.issue.title || ''); + const cleanBody = body.replace(/```[\s\S]*?```/g, ''); + const text = `${title}\n${cleanBody}`.toLowerCase(); + const labelsToAdd = new Set(); + const matches = (re) => re.test(text); + + // Keyword Heuristics + + if (matches(/\b(bug|error|crash|exception)\b/i)) labelsToAdd.add('bug'); + if (matches(/\b(new feature|enhancement|improvement|proposal|feature request)\b/i)) labelsToAdd.add('enhancement'); + if (matches(/\b(question|how to|clarify|explain|how do i|help me|question about)\b/i)) labelsToAdd.add('question'); + if (matches(/\b(documentation|docs?|readme|tutorial|wiki|typo|docstring)\b/i)) labelsToAdd.add('documentation'); + if (matches(/\b(example|sample|demo|notebook)s?\b/i)) labelsToAdd.add('examples'); + if (matches(/\b(datasets?|data loader|data augmentation|data preprocessing)\b/i)) labelsToAdd.add('dataset'); + if (matches(/\b(mujoco|isaac|simulation|sim)\b/i)) labelsToAdd.add('simulation'); + if (matches(/\b(train|training|optimizer|gradient|wandb|sac)\b/i)) labelsToAdd.add('training'); + if (matches(/\b(rerun|plot|render|rendering|visualizer)/i)) labelsToAdd.add('visualization'); + if (matches(/\b(cameras?|opencv|realsense|lidars?|sensors?|imus?|microphones?|rgbd|encoders?)\b/i)) labelsToAdd.add('sensors'); + if (matches(/\b(urdf|actuators?|calibration|end-effector|kinematics)\b/i)) labelsToAdd.add('robots'); + if (matches(/\b(teleop|teleoperator|controller|leader|follower|joystick|gamepad)\b/i)) labelsToAdd.add('teleoperators'); + if (matches(/\b(policy|policies|model?)\b/i)) labelsToAdd.add('policies'); + if (matches(/\b(processor|pipeline|preprocessor|postprocessor)s?\b/i)) labelsToAdd.add('processor'); + if (matches(/\b(eval|evaluate|evaluation|metrics?|score|benchmarks?)\b/i)) labelsToAdd.add('evaluation'); + if (matches(/\b(tests?|pytest|unittest|failing test)\b/i)) labelsToAdd.add('tests'); + if (matches(/\b(ci|github actions?|github workflows?|gha|docker|pypi)\b/i)) labelsToAdd.add('CI'); + if (matches(/\b(perf|latency|throughput|fps|speed|performance|slow|fast|slower|faster|memory usage)\b/i)) labelsToAdd.add('performance'); + if (matches(/\b(dependency|dependencies|pip|install error|importerror|package not found|pyproject)\b/i)) labelsToAdd.add('dependencies'); + if (matches(/\b(configuration|config|arguments?|input feature|dracuss)\b/i)) labelsToAdd.add('configuration'); + + // Apply Labels + const labels = Array.from(labelsToAdd).filter(Boolean); + + if (labels.length > 0) { + console.log(`Adding labels: ${labels.join(', ')}`); + await github.rest.issues.addLabels({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + labels, + }); + } diff --git a/lerobot/.github/workflows/nightly.yml b/lerobot/.github/workflows/nightly.yml new file mode 100644 index 0000000000000000000000000000000000000000..5bc86857a8022ec6554d243f5683b6089993f157 --- /dev/null +++ b/lerobot/.github/workflows/nightly.yml @@ -0,0 +1,212 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This workflow handles nightly testing & docker images publishing. +name: Nightly +permissions: + contents: read + +on: + # Allows running this workflow manually from the Actions tab + workflow_dispatch: + + # Runs at 02:00 + schedule: + - cron: "0 2 * * *" + +# Sets up the environment variables +env: + UV_VERSION: "0.8.0" + PYTHON_VERSION: "3.12" + DOCKER_IMAGE_NAME_CPU: huggingface/lerobot-cpu:latest + DOCKER_IMAGE_NAME_GPU: huggingface/lerobot-gpu:latest + +# Ensures that only the latest commit is built, canceling older runs. +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + # This job builds a CPU image for testing & distribution + build-docker-cpu-nightly: + name: Build CPU Docker for Nightly + runs-on: + group: aws-general-8-plus + if: github.repository == 'huggingface/lerobot' + outputs: + image_tag: ${{ env.DOCKER_IMAGE_NAME_CPU }} + steps: + - name: Install Git LFS + run: | + sudo apt-get update + sudo apt-get install git-lfs + git lfs install + - uses: actions/checkout@v6 + with: + lfs: true + persist-credentials: false + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses] + with: + cache-binary: false + - name: Login to Docker Hub + uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses] + with: + username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }} + password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }} + - name: Build and push Docker image CPU + uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses] + with: + context: . + file: ./docker/Dockerfile.user + push: true + tags: ${{ env.DOCKER_IMAGE_NAME_CPU }} + + # This job builds a GPU image for testing & distribution + build-docker-gpu-nightly: + name: Build GPU Docker for Nightly + runs-on: + group: aws-general-8-plus + if: github.repository == 'huggingface/lerobot' + outputs: + image_tag: ${{ env.DOCKER_IMAGE_NAME_GPU }} + steps: + - name: Install Git LFS + run: | + sudo apt-get update + sudo apt-get install git-lfs + git lfs install + - uses: actions/checkout@v6 + with: + lfs: true + persist-credentials: false + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses] + with: + cache-binary: false + - name: Login to Docker Hub + uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses] + with: + username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }} + password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }} + - name: Build and push Docker image GPU + uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses] + with: + context: . + file: ./docker/Dockerfile.internal + push: true + tags: ${{ env.DOCKER_IMAGE_NAME_GPU }} + + # This job runs the E2E tests + pytest with all extras in the CPU image + nightly-cpu-tests: + name: Nightly CPU Tests + needs: [build-docker-cpu-nightly] + runs-on: + group: aws-g6-4xlarge-plus + env: + HF_HOME: /home/user_lerobot/.cache/huggingface + HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot + TORCH_HOME: /home/user_lerobot/.cache/torch + TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton + HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }} + container: + image: ${{ needs.build-docker-cpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images] + options: --shm-size "16gb" + credentials: + username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }} + password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }} + defaults: + run: + shell: bash + working-directory: /lerobot + steps: + - name: Login to Hugging Face + if: env.HF_USER_TOKEN != '' + run: | + hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential + hf auth whoami + - name: Run pytest on CPU + run: pytest tests -vv --maxfail=10 + - name: Run end-to-end tests + run: make test-end-to-end + + # This job runs the E2E tests + pytest with all extras in the GPU image + nightly-gpu-tests: + name: Nightly GPU Tests + needs: [build-docker-gpu-nightly] + runs-on: + group: aws-g6-4xlarge-plus + env: + HF_HOME: /home/user_lerobot/.cache/huggingface + HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot + TORCH_HOME: /home/user_lerobot/.cache/torch + TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton + HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }} + container: + image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images] + options: --gpus all --shm-size "16gb" + credentials: + username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }} + password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }} + defaults: + run: + shell: bash + working-directory: /lerobot + steps: + - name: Login to Hugging Face + if: env.HF_USER_TOKEN != '' + run: | + hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential + hf auth whoami + - name: Run pytest on GPU + run: pytest tests -vv --maxfail=10 + - name: Run end-to-end tests + run: make test-end-to-end + + # This job runs multi-GPU training tests with 4 GPUs + nightly-multi-gpu-tests: + name: Nightly Multi-GPU Tests + needs: [build-docker-gpu-nightly] + runs-on: + group: aws-g4dn-12xlarge # Instance with 4 GPUs + env: + HF_HOME: /home/user_lerobot/.cache/huggingface + HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot + TORCH_HOME: /home/user_lerobot/.cache/torch + TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton + CUDA_VISIBLE_DEVICES: "0,1,2,3" + HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }} + container: + image: ${{ needs.build-docker-gpu-nightly.outputs.image_tag }} # zizmor: ignore[unpinned-images] + options: --gpus all --shm-size "16gb" + credentials: + username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }} + password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }} + defaults: + run: + shell: bash + working-directory: /lerobot + steps: + - name: Login to Hugging Face + if: env.HF_USER_TOKEN != '' + run: | + hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential + hf auth whoami + - name: Verify GPU availability + run: | + nvidia-smi + python -c "import torch; print(f'PyTorch CUDA available: {torch.cuda.is_available()}'); print(f'Number of GPUs: {torch.cuda.device_count()}')" + + - name: Run multi-GPU training tests + run: pytest -vv tests/training/ diff --git a/lerobot/.github/workflows/pr_labeler.yml b/lerobot/.github/workflows/pr_labeler.yml new file mode 100644 index 0000000000000000000000000000000000000000..177c209596d933a660d03bf108de4e3397bbdf10 --- /dev/null +++ b/lerobot/.github/workflows/pr_labeler.yml @@ -0,0 +1,39 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This workflow labels pull requests based on the files that were changed. +name: Pull Request Labeler + +on: + # Allows labeling pull requests when they are opened or updated + # zizmor: ignore[dangerous-triggers] Needed to label PRs from forks + pull_request_target: + branches: + - main + types: [opened, synchronize, reopened, ready_for_review] + +permissions: + contents: read + pull-requests: write + +jobs: + triage: + name: Label PR + runs-on: ubuntu-latest + if: github.repository == 'huggingface/lerobot' && !github.event.pull_request.draft + steps: + - uses: actions/labeler@v6 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + sync-labels: true # Removes labels if files are removed from the PR diff --git a/lerobot/.github/workflows/quality.yml b/lerobot/.github/workflows/quality.yml new file mode 100644 index 0000000000000000000000000000000000000000..a84e9c17ed3771bbe57647bc3ce7f6632f731162 --- /dev/null +++ b/lerobot/.github/workflows/quality.yml @@ -0,0 +1,58 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This workflow handles linting, formatting, and static analysis checks for the codebase. +name: Quality +permissions: + contents: read + +on: + # Allows running this workflow manually from the Actions tab + workflow_dispatch: + + # Triggers the workflow on push events to main + push: + branches: + - main + + # Triggers the workflow on pull request events targeting main + pull_request: + branches: + - main + +# Ensures that only the latest commit for a PR or branch is built, canceling older runs. +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + # This job runs pre-commit hooks to check code style and formatting. + pre-commit-checks: + name: Run Pre-commit Hooks (Lint, Format & Static Analysis) + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v6 + with: + persist-credentials: false + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Run pre-commit hooks + uses: pre-commit/action@v3.0.1 # zizmor: ignore[unpinned-uses] + with: + extra_args: --all-files --show-diff-on-failure --color=always diff --git a/lerobot/.github/workflows/release.yml b/lerobot/.github/workflows/release.yml new file mode 100644 index 0000000000000000000000000000000000000000..f7bd2be6c565e2030ac297f7e382e76da1c45a3e --- /dev/null +++ b/lerobot/.github/workflows/release.yml @@ -0,0 +1,171 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Create Release and Publish to PyPI + +on: + push: + tags: + - 'v*.*.*' # Trigger on tags like v0.1.0, v1.0.0 + +# Sets up the environment variables +env: + UV_VERSION: "0.8.0" + PYTHON_VERSION: "3.12" + +jobs: + # This job builds the Python package and publishes it to PyPI + build-and-publish: + name: Build and publish Python distributions + runs-on: ubuntu-latest + if: github.repository == 'huggingface/lerobot' + outputs: + version: ${{ steps.extract_info.outputs.tag_version }} + permissions: + contents: write + id-token: write + + steps: + - name: Checkout code + uses: actions/checkout@v6 + with: + persist-credentials: false + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Extract Version + id: extract_info + # Extract version from tag (e.g., v0.1.0 -> 0.1.0) + # zizmor: ignore[template-injection] + run: | + VERSION=${{ github.ref_name }} + VERSION_NUMBER=${VERSION#v} + echo "tag_version=$VERSION_NUMBER" >> $GITHUB_OUTPUT + - name: Check if version matches pyproject.toml + if: startsWith(github.ref, 'refs/tags/v') && !contains(github.ref, '-') + # zizmor: ignore[template-injection] + run: | + TAG_VERSION=${{ steps.extract_info.outputs.tag_version }} + + PYPROJECT_VERSION=$(grep '^version = ' pyproject.toml | awk -F' = ' '{print $2}' | tr -d '"') + + if [[ "$TAG_VERSION" != "$PYPROJECT_VERSION" ]]; then + echo "Error: Tag version ($TAG_VERSION) does not match pyproject.toml version ($PYPROJECT_VERSION)." >&2 + exit 1 + else + echo "Tag version matches pyproject.toml version: $TAG_VERSION. Proceeding with release." + fi + + - name: Check if version exists on PyPI + # zizmor: ignore[template-injection] + run: | + NEW_VERSION=${{ steps.extract_info.outputs.tag_version }} + + response=$(curl -s "https://pypi.org/pypi/lerobot/$NEW_VERSION/json") + if echo "$response" | grep -q "message"; then + echo "Version $NEW_VERSION is available on PyPI. Proceeding with release." + else + echo "Error: Version $NEW_VERSION already exists on PyPI. Aborting." + exit 1 + fi + + - name: Install build dependencies + run: python -m pip install build + + - name: Build package + run: python -m build + + - name: Create GitHub Release + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # zizmor: ignore[template-injection] + run: | + gh release create ${{ github.ref_name }} \ + --title "Release ${{ github.ref_name }}" \ + --generate-notes \ + --draft=$([[ "${{ github.ref_name }}" == *-* ]] && echo true || echo false) \ + --prerelease=$([[ "${{ github.ref_name }}" == *-* ]] && echo true || echo false) \ + ./dist/* + + - name: Publish to TestPyPI for pre-releases + # True for tags like 'v0.2.0-rc1' + if: startsWith(github.ref, 'refs/tags/v') && contains(github.ref, '-') + uses: pypa/gh-action-pypi-publish@v1.13.0 # zizmor: ignore[unpinned-uses, use-trusted-publishing] + with: + repository-url: https://test.pypi.org/legacy/ + verbose: true + print-hash: true + + - name: Publish to PyPI + if: startsWith(github.ref, 'refs/tags/v') && !contains(github.ref, '-') + uses: pypa/gh-action-pypi-publish@v1.13.0 # zizmor: ignore[unpinned-uses, use-trusted-publishing] + with: + verbose: true + print-hash: true + + # This job runs end-to-end tests on the release + test-release: + name: Test Release + needs: [build-and-publish] + runs-on: ubuntu-latest + permissions: + contents: read + env: + MUJOCO_GL: egl + steps: + - uses: actions/checkout@v6 + with: + lfs: true + persist-credentials: false + - name: Install apt dependencies + run: | + sudo apt-get update && sudo apt-get install -y build-essential \ + git curl libglib2.0-0 libegl1-mesa-dev ffmpeg libusb-1.0-0-dev \ + speech-dispatcher libgeos-dev portaudio19-dev + - name: Setup uv and Python + uses: astral-sh/setup-uv@v6 # zizmor: ignore[unpinned-uses] + with: + enable-cache: true # zizmor: ignore[cache-poisoning] + version: ${{ env.UV_VERSION }} + python-version: ${{ env.PYTHON_VERSION }} + - name: Create uv virtual environment + run: uv venv + - name: Install lerobot release + # zizmor: ignore[template-injection] + run: | + VERSION="${{ needs.build-and-publish.outputs.version }}" + if [[ "$VERSION" == *-* ]]; then + BASE_VERSION="${VERSION%%-*}" + echo "Installing pre-release version $BASE_VERSION from TestPyPI..." + uv pip install \ + --index-url https://test.pypi.org/simple/ \ + --extra-index-url https://pypi.org/simple \ + --index-strategy unsafe-best-match \ + "lerobot[all]==$BASE_VERSION" + else + echo "Installing release version $VERSION from PyPI..." + uv pip install "lerobot[all]==$VERSION" + fi + - name: Check lerobot version + run: uv run python -c "import lerobot; print(lerobot.__version__)" + + - name: Run end-to-end tests + run: uv run make test-end-to-end + + +# TODO(Steven): Publish draft/pre-release and to test pypi weekly +# TODO(Steven): Separate build and publish job diff --git a/lerobot/.github/workflows/security.yml b/lerobot/.github/workflows/security.yml new file mode 100644 index 0000000000000000000000000000000000000000..50c0c1fc3df05b3dacdcc2b8896c2b98d68ab973 --- /dev/null +++ b/lerobot/.github/workflows/security.yml @@ -0,0 +1,54 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This workflow handles secret scanning using TruffleHog to detect sensitive information in the codebase. +name: Security +permissions: + contents: read + +on: + # Allows running this workflow manually from the Actions tab + workflow_dispatch: + + # Triggers the workflow on push events to main + push: + branches: + - main + + # Triggers the workflow on pull request events targeting main + pull_request: + branches: + - main + +# Ensures that only the latest commit for a PR or branch is built, canceling older runs. +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + # This job runs TruffleHog to scan the full history of the repository for secrets. + trufflehog: + name: Secret Leaks Scan + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v6 # zizmor: ignore[unpinned-uses] + with: + fetch-depth: 0 + persist-credentials: false + + - name: Secret Scanning + uses: trufflesecurity/trufflehog@v3.90.0 # zizmor: ignore[unpinned-uses] + with: + extra_args: --only-verified diff --git a/lerobot/.github/workflows/stale.yml b/lerobot/.github/workflows/stale.yml new file mode 100644 index 0000000000000000000000000000000000000000..4dc119b5efe5448e88cdc9555db07284cbbf3689 --- /dev/null +++ b/lerobot/.github/workflows/stale.yml @@ -0,0 +1,71 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This workflow handles closing stale issues and PRs. +name: Stale +on: + # Allows running this workflow manually from the Actions tab + workflow_dispatch: + + # Runs at 02:00 + schedule: + - cron: "0 2 * * *" + +env: + CLOSE_ISSUE_MESSAGE: > + This issue was closed because it has been stalled for 14 days with no activity. + Feel free to reopen if is still relevant, or to ping a collaborator if you have any questions. + CLOSE_PR_MESSAGE: > + This PR was closed because it has been stalled for 21 days with no activity. + Feel free to reopen if is still relevant, or to ping a collaborator if you have any questions. + WARN_ISSUE_MESSAGE: > + This issue has been automatically marked as stale because it has not had + recent activity (6 months). It will be closed if no further activity occurs. + Any change, comment or update to this issue will reset this count. + Thank you for your contributions. + WARN_PR_MESSAGE: > + This PR has been automatically marked as stale because it has not had + recent activity (1 year). It will be closed if no further activity occurs. + Any change, comment or update to this PR will reset this count. + Thank you for your contributions. + +jobs: + # This job runs the actions/stale action to close stale issues and PRs. + stale: + name: Close Stale Issues and PRs + runs-on: ubuntu-latest + if: github.repository == 'huggingface/lerobot' + permissions: + actions: write + contents: write # only for delete-branch option + issues: write + pull-requests: write + steps: + - uses: actions/stale@v10 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-issue-label: stale + stale-pr-label: stale + exempt-issue-labels: never-stale + exempt-pr-labels: never-stale + days-before-issue-stale: 180 + days-before-issue-close: 14 + days-before-pr-stale: 365 + days-before-pr-close: 21 + delete-branch: true + close-issue-message: ${{ env.CLOSE_ISSUE_MESSAGE }} + close-pr-message: ${{ env.CLOSE_PR_MESSAGE }} + stale-issue-message: ${{ env.WARN_ISSUE_MESSAGE }} + stale-pr-message: ${{ env.WARN_PR_MESSAGE }} + operations-per-run: 500 diff --git a/lerobot/.github/workflows/unbound_deps_tests.yml b/lerobot/.github/workflows/unbound_deps_tests.yml new file mode 100644 index 0000000000000000000000000000000000000000..404816c5204a48d88eebac4366140642ce2b813b --- /dev/null +++ b/lerobot/.github/workflows/unbound_deps_tests.yml @@ -0,0 +1,207 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This workflow handles full testing with unboud dependencies versions. +name: Unbound Dependency Tests + +on: + # Allows running this workflow manually from the Actions tab + workflow_dispatch: + + # Run on the 1st and 15th of every month at 09:00 UTC + # schedule: + # - cron: '0 2 1,15 * *' + +permissions: + contents: read + +# Sets up the environment variables +env: + UV_VERSION: "0.8.0" + PYTHON_VERSION: "3.12" + DOCKER_IMAGE_NAME: huggingface/lerobot-gpu:unbound + +# Ensures that only the latest action is built, canceling older runs. +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + + # This job runs the E2E tests + pytest with all unbound extras + full-tests: + name: Full Unbound Tests + runs-on: ubuntu-latest + if: github.repository == 'huggingface/lerobot' + env: + MUJOCO_GL: egl + HF_HOME: /mnt/cache/.cache/huggingface + HF_LEROBOT_HOME: /mnt/cache/.cache/huggingface/lerobot + HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }} + steps: + - uses: actions/checkout@v6 + with: + lfs: true + persist-credentials: false + + # NOTE(Steven): Mount to `/mnt` to avoid the limited storage on `/home`. Consider cleaning default SDKs or using self-hosted runners for more space. + # (As of 2024-06-10, the runner's `/home` has only 6.2 GB free—8% of its 72 GB total.) + - name: Setup /mnt storage + run: sudo chown -R $USER:$USER /mnt + + - name: Install apt dependencies + run: | + sudo apt-get update && sudo apt-get install -y build-essential \ + git curl libglib2.0-0 libegl1-mesa-dev ffmpeg libusb-1.0-0-dev \ + speech-dispatcher libgeos-dev portaudio19-dev + + - name: Setup uv and Python + uses: astral-sh/setup-uv@v6 # zizmor: ignore[unpinned-uses] + with: + enable-cache: true + version: ${{ env.UV_VERSION }} + python-version: ${{ env.PYTHON_VERSION }} + + - name: Unbound dependencies + run: | + sed -i 's/,[[:space:]]*<[0-9\.]*//g' pyproject.toml + echo "Dependencies unbound:" && cat pyproject.toml + + - name: Install lerobot with all extras + run: uv sync --extra all # TODO(Steven): Make flash-attn optional + - name: Login to Hugging Face + if: env.HF_USER_TOKEN != '' + run: | + uv run hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential + uv run hf auth whoami + - name: Run pytest (all extras) + run: uv run pytest tests -vv + + - name: Run end-to-end tests + run: uv run make test-end-to-end + + # This job builds a GPU enabled image for testing + build-and-push-docker: + name: Build and Push Docker + runs-on: + group: aws-general-8-plus + if: github.repository == 'huggingface/lerobot' + outputs: + image_tag: ${{ env.DOCKER_IMAGE_NAME }} + env: + GITHUB_REF: ${{ github.ref }} + steps: + - name: Install Git LFS + run: | + sudo apt-get update + sudo apt-get install git-lfs + git lfs install + - uses: actions/checkout@v6 + with: + lfs: true + persist-credentials: false + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 # zizmor: ignore[unpinned-uses] + with: + cache-binary: false + - name: Login to Docker Hub + uses: docker/login-action@v3 # zizmor: ignore[unpinned-uses] + with: + username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }} + password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }} + - name: Build and push Docker image + uses: docker/build-push-action@v6 # zizmor: ignore[unpinned-uses] + with: + context: . + file: ./docker/Dockerfile.internal + push: true + tags: ${{ env.DOCKER_IMAGE_NAME }} + build-args: | + UNBOUND_DEPS=true + + # This job runs pytest with all unbound extras in a GPU enabled host + # It runs everytime a test image is created + gpu-tests: + name: GPU Unbound Tests + needs: [build-and-push-docker] + runs-on: + group: aws-g6-4xlarge-plus + env: + HF_HOME: /home/user_lerobot/.cache/huggingface + HF_LEROBOT_HOME: /home/user_lerobot/.cache/huggingface/lerobot + TORCH_HOME: /home/user_lerobot/.cache/torch + TRITON_CACHE_DIR: /home/user_lerobot/.cache/triton + HF_USER_TOKEN: ${{ secrets.LEROBOT_HF_USER }} + container: + image: ${{ needs.build-and-push-docker.outputs.image_tag }} # zizmor: ignore[unpinned-images] + options: --gpus all --shm-size "16gb" + credentials: + username: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }} + password: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }} + defaults: + run: + shell: bash + working-directory: /lerobot + steps: + - name: Login to Hugging Face + if: env.HF_USER_TOKEN != '' + run: | + hf auth login --token "$HF_USER_TOKEN" --add-to-git-credential + hf auth whoami + - name: Run pytest on GPU + run: pytest tests -vv + - name: Run end-to-end tests + run: make test-end-to-end + + # This job deletes the test image recently created + # It runs everytime after the gpu-tests have finished + delete-unbound-image: + name: Delete Unbound Image + needs: [gpu-tests, build-and-push-docker] + if: always() && needs.build-and-push-docker.result == 'success' + runs-on: ubuntu-latest + steps: + - name: Get Docker Hub Token and Delete Image + # zizmor: ignore[template-injection] + env: + DOCKERHUB_LEROBOT_USERNAME: ${{ secrets.DOCKERHUB_LEROBOT_USERNAME }} + DOCKERHUB_LEROBOT_PASSWORD: ${{ secrets.DOCKERHUB_LEROBOT_PASSWORD }} + IMAGE_FULL: ${{ needs.build-and-push-docker.outputs.image_tag }} + run: | + IMAGE_NAME=$(echo "$IMAGE_FULL" | cut -d':' -f1) + IMAGE_TAG=$(echo "$IMAGE_FULL" | cut -d':' -f2) + + echo "Attempting to delete image: $IMAGE_NAME:$IMAGE_TAG" + + TOKEN=$(curl -s -H "Content-Type: application/json" \ + -X POST \ + -d "{\"username\": \"$DOCKERHUB_LEROBOT_USERNAME\", \"password\": \"$DOCKERHUB_LEROBOT_PASSWORD\"}" \ + https://hub.docker.com/v2/users/login/ | jq -r .token) + + if [ "$TOKEN" == "null" ] || [ -z "$TOKEN" ]; then + echo "::error::Failed to get Docker Hub token." + exit 1 + fi + + HTTP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: JWT ${TOKEN}" \ + -X DELETE \ + https://hub.docker.com/v2/repositories/${IMAGE_NAME}/tags/$IMAGE_TAG) + + if [ "$HTTP_RESPONSE" -eq 204 ]; then + echo "Successfully deleted Docker image tag: $IMAGE_NAME:$IMAGE_TAG" + else + echo "::error::Failed to delete Docker image. HTTP status: $HTTP_RESPONSE" + exit 1 + fi diff --git a/lerobot/.gitignore b/lerobot/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b47e22cbfadc53e203d36f95d11b76b2666ea2ff --- /dev/null +++ b/lerobot/.gitignore @@ -0,0 +1,179 @@ +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +### Environments & Dependencies ### +.env +.venv +env/ +venv/ +env.bak/ +venv.bak/ +.python-version +__pypackages__/ +node_modules/ + +# Lock files +poetry.lock +uv.lock +Pipfile.lock + +### Build & Distribution ### +build/ +dist/ +sdist/ +wheels/ +downloads/ +eggs/ +.eggs/ +parts/ +var/ +pip-wheel-metadata/ +share/python-wheels/ +develop-eggs/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +lib/ +lib64/ + +# PyInstaller +*.manifest +*.spec + +### Compiled & Cached Files ### +__pycache__/ +*.py[cod] +*$py.class +*.so +*.sage.py +.cache/ +.ruff_cache/ +.mypy_cache/ +.pyre/ +.pytype/ +cython_debug/ + +### Testing & Coverage ### +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.pytest_cache/ +.hypothesis/ +nosetests.xml +coverage.xml +*.cover +*.py,cover +!tests/artifacts + +### Logs & Temporary Files ### +logs/ +tmp/ +*.log +pip-log.txt +pip-delete-this-directory.txt +celerybeat-schedule +celerybeat.pid + +### IDE & Editor Config ### +# VS Code +.vscode/ +.devcontainer/ + +# JetBrains / PyCharm +.idea/ + +# Spyder +.spyderproject +.spyproject + +# Rope +.ropeproject + +# Vim +*.swp + +# Other +*~ + +### OS Specific ### +# macOS +.DS_Store + +# Windows +Thumbs.db + +### Framework & Tool Specific ### + +.Python + +# Django +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask +instance/ +.webassets-cache + +# Scrapy +.scrapy + +# Jupyter +.ipynb_checkpoints/ +profile_default/ +ipython_config.py + +# Sphinx +docs/_build/ + +# MkDocs +/site + +# PyBuilder +.pybuilder/ +target/ + +# mypy +.dmypy.json +dmypy.json + +### HPC & Slurm ### +nautilus/*.yaml +*.key +sbatch*.sh + +### Miscellaneous ### +# W&B +wandb/ + +# Dev scripts +.dev/ + +# Data folders +data/ +outputs/ + +# Translations +*.mo +*.pot + +# Dev folders +.cache/* +*.stl +*.urdf +*.xml +*.part diff --git a/lerobot/.pre-commit-config.yaml b/lerobot/.pre-commit-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dff7416f41d157cd1be5e4bd35f1f68fc21f20f9 --- /dev/null +++ b/lerobot/.pre-commit-config.yaml @@ -0,0 +1,108 @@ +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +default_language_version: + python: python3.12 + +exclude: "tests/artifacts/.*\\.safetensors$" + +repos: + ##### Meta ##### + - repo: meta + hooks: + - id: check-useless-excludes + - id: check-hooks-apply + + ##### General Code Quality & Formatting ##### + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: check-added-large-files + args: ['--maxkb=1024'] + - id: debug-statements + - id: check-merge-conflict + - id: check-case-conflict + - id: check-yaml + - id: check-toml + - id: end-of-file-fixer + - id: trailing-whitespace + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.14.1 + hooks: + - id: ruff-format + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + + - repo: https://github.com/adhtruong/mirrors-typos + rev: v1.38.1 + hooks: + - id: typos + args: [--force-exclude] + + - repo: https://github.com/asottile/pyupgrade + rev: v3.21.0 + hooks: + - id: pyupgrade + args: [--py312-plus] + + ##### Markdown Quality ##### + - repo: https://github.com/rbubley/mirrors-prettier + rev: v3.6.2 + hooks: + - id: prettier + name: Format Markdown with Prettier + types_or: [markdown, mdx] + args: [--prose-wrap=preserve] + + ##### Security ##### + - repo: https://github.com/gitleaks/gitleaks + rev: v8.28.0 + hooks: + - id: gitleaks + + - repo: https://github.com/woodruffw/zizmor-pre-commit + rev: v1.15.2 + hooks: + - id: zizmor + + - repo: https://github.com/PyCQA/bandit + rev: 1.8.6 + hooks: + - id: bandit + args: ["-c", "pyproject.toml"] + additional_dependencies: ["bandit[toml]"] + + # TODO(Steven): Uncomment when ready to use + ##### Static Analysis & Typing ##### + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.19.1 + hooks: + - id: mypy + args: [--config-file=pyproject.toml] + exclude: ^(examples|benchmarks|tests)/ + + ##### Docstring Checks ##### + # - repo: https://github.com/akaihola/darglint2 + # rev: v1.8.2 + # hooks: + # - id: darglint2 + # args: ["--docstring-style", "google", "-v", "2"] + # exclude: ^tests/.*$ + + # - repo: https://github.com/econchick/interrogate + # rev: 1.7.0 + # hooks: + # - id: interrogate + # args: ["-vv", "--config=pyproject.toml"] diff --git a/lerobot/AI_POLICY.md b/lerobot/AI_POLICY.md new file mode 100644 index 0000000000000000000000000000000000000000..272ee8c120072bf326f129c9f48c390c0a7c9e2e --- /dev/null +++ b/lerobot/AI_POLICY.md @@ -0,0 +1,25 @@ +# AI Usage Policy + +The LeRobot project welcomes contributions from everyone, and we have a few guidelines regarding AI usage to ensure high code quality, clear communication, and a healthy open-source ecosystem: + +- **Please disclose significant AI assistance.** If you used AI tools (e.g., Copilot, Claude, Cursor, ChatGPT) to generate a substantial portion of your code or text, let us know in your PR description. Transparency helps us review your changes more effectively. +- **Own your code (The Human-in-the-Loop).** You must fully understand all the changes you are proposing. If you cannot explain what your AI-assisted code does or how it interacts with LeRobot's broader architecture, please take the time to learn and test it before submitting. +- **Keep issues and discussions focused.** You are welcome to use AI to help draft issues or PR descriptions, but please review and edit them carefully before posting. AI can often be overly verbose; trimming the noise and getting straight to the point helps our maintainers address your needs faster. + +Our core maintainers also use AI tools to aid their workflows, but they do so while bringing deep contextual knowledge of the LeRobot codebase to validate the output. We ask all contributors to apply that same level of rigor. + +## Remember the Human Maintainers + +Please remember that LeRobot is maintained by a dedicated team of humans. + +Every discussion, issue, and pull request is read and reviewed by real people. While AI tools can generate thousands of lines of code in seconds, reviewing that code still takes human time and energy. Submitting unverified or low-effort AI output puts an unfair burden on our maintainers. + +Today, the quality of the AI output still heavily depends on the developer driving the tool. We ask that you respect our maintainers' time by thoroughly vetting, testing, and refining your submissions. + +## AI is Welcome Here + +LeRobot operates at the cutting edge of AI and robotics, and many of our maintainers actively embrace AI coding assistants as valuable productivity tools. We are a pro-AI project! + +Our reason for having an AI policy is not an anti-AI stance. Rather, it exists to ensure that AI is used to enhance human contributions, not replace them with unverified noise. It's about how the tools are used, not the tools themselves. + +We value the unique human insight you bring to the LeRobot community. Let AI empower your workflow, but always let your own judgment take the wheel. diff --git a/lerobot/CODE_OF_CONDUCT.md b/lerobot/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000000000000000000000000000000000..305ffa276c5c42a15812cb2ddcfd2df3c992acf5 --- /dev/null +++ b/lerobot/CODE_OF_CONDUCT.md @@ -0,0 +1,132 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +- Demonstrating empathy and kindness toward other people +- Being respectful of differing opinions, viewpoints, and experiences +- Giving and gracefully accepting constructive feedback +- Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +- Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +- The use of sexualized language or imagery, and sexual attention or advances of + any kind +- Trolling, insulting or derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or email address, + without their explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +feedback@huggingface.co. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations diff --git a/lerobot/CONTRIBUTING.md b/lerobot/CONTRIBUTING.md new file mode 100644 index 0000000000000000000000000000000000000000..60df93b27965a741448ce842a1d092d6fcdf9312 --- /dev/null +++ b/lerobot/CONTRIBUTING.md @@ -0,0 +1,83 @@ +# How to contribute to 🤗 LeRobot + +Everyone is welcome to contribute, and we value everybody's contribution. Code is not the only way to help the community. Answering questions, helping others, reaching out, and improving the documentation are immensely valuable. + +Whichever way you choose to contribute, please be mindful to respect our [code of conduct](https://github.com/huggingface/lerobot/blob/main/CODE_OF_CONDUCT.md) and our [AI policy](https://github.com/huggingface/lerobot/blob/main/AI_POLICY.md). + +## Ways to Contribute + +You can contribute in many ways: + +- **Fixing issues:** Resolve bugs or improve existing code. +- **New features:** Develop new features. +- **Extend:** Implement new models/policies, robots, or simulation environments and upload datasets to the Hugging Face Hub. +- **Documentation:** Improve examples, guides, and docstrings. +- **Feedback:** Submit tickets related to bugs or desired new features. + +If you are unsure where to start, join our [Discord Channel](https://discord.gg/q8Dzzpym3f). + +## Development Setup + +To contribute code, you need to set up a development environment. + +### 1. Fork and Clone + +Fork the repository on GitHub, then clone your fork: + +```bash +git clone https://github.com//lerobot.git +cd lerobot +git remote add upstream https://github.com/huggingface/lerobot.git +``` + +### 2. Environment Installation + +Please follow our [Installation Guide](https://huggingface.co/docs/lerobot/installation) for the environment setup & installation from source. + +## Running Tests & Quality Checks + +### Code Style (Pre-commit) + +Install `pre-commit` hooks to run checks automatically before you commit: + +```bash +pre-commit install +``` + +To run checks manually on all files: + +```bash +pre-commit run --all-files +``` + +### Running Tests + +We use `pytest`. First, ensure you have test artifacts by installing **git-lfs**: + +```bash +git lfs install +git lfs pull +``` + +Run the full suite (this may require extras installed): + +```bash +pytest -sv ./tests +``` + +Or run a specific test file during development: + +```bash +pytest -sv tests/test_specific_feature.py +``` + +## Submitting Issues & Pull Requests + +Use the templates for required fields and examples. + +- **Issues:** Follow the [ticket template](https://github.com/huggingface/lerobot/blob/main/.github/ISSUE_TEMPLATE/bug-report.yml). +- **Pull requests:** Rebase on `upstream/main`, use a descriptive branch (don't work on `main`), run `pre-commit` and tests locally, and follow the [PR template](https://github.com/huggingface/lerobot/blob/main/.github/PULL_REQUEST_TEMPLATE.md). + +One member of the LeRobot team will then review your contribution. + +Thank you for contributing to LeRobot! diff --git a/lerobot/LICENSE b/lerobot/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..a603343cdda0228e53d8d9cdeb355f342f1e8f2c --- /dev/null +++ b/lerobot/LICENSE @@ -0,0 +1,507 @@ +Copyright 2024 The Hugging Face team. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +## Some of lerobot's code is derived from Diffusion Policy, which is subject to the following copyright notice: + +MIT License + +Copyright (c) 2023 Columbia Artificial Intelligence and Robotics Lab + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +## Some of lerobot's code is derived from FOWM, which is subject to the following copyright notice: + +MIT License + +Copyright (c) 2023 Yunhai Feng + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +## Some of lerobot's code is derived from simxarm, which is subject to the following copyright notice: + +MIT License + +Copyright (c) 2023 Nicklas Hansen & Yanjie Ze + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +## Some of lerobot's code is derived from ALOHA, which is subject to the following copyright notice: + +MIT License + +Copyright (c) 2023 Tony Z. Zhao + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +## Some of lerobot's code is derived from DETR, which is subject to the following copyright notice: + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 - present, Facebook, Inc + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/lerobot/MANIFEST.in b/lerobot/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..c1fce3b5a3b8a0848dce7a8ffe7c5d21a9e5d05f --- /dev/null +++ b/lerobot/MANIFEST.in @@ -0,0 +1,3 @@ +include src/lerobot/templates/lerobot_modelcard_template.md +include src/lerobot/datasets/card_template.md +include src/lerobot/envs/metaworld_config.json diff --git a/lerobot/Makefile b/lerobot/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..e02f024031b506b275ba58ae41b4080e05fd32e6 --- /dev/null +++ b/lerobot/Makefile @@ -0,0 +1,180 @@ +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.PHONY: tests + +PYTHON_PATH := $(shell which python) + +# If uv is installed and a virtual environment exists, use it +UV_CHECK := $(shell command -v uv) +ifneq ($(UV_CHECK),) + PYTHON_PATH := $(shell .venv/bin/python) +endif + +export PATH := $(dir $(PYTHON_PATH)):$(PATH) + +DEVICE ?= cpu + +build-user: + docker build -f docker/Dockerfile.user -t lerobot-user . + +build-internal: + docker build -f docker/Dockerfile.internal -t lerobot-internal . + +test-end-to-end: + ${MAKE} DEVICE=$(DEVICE) test-act-ete-train + ${MAKE} DEVICE=$(DEVICE) test-act-ete-train-resume + ${MAKE} DEVICE=$(DEVICE) test-act-ete-eval + ${MAKE} DEVICE=$(DEVICE) test-diffusion-ete-train + ${MAKE} DEVICE=$(DEVICE) test-diffusion-ete-eval + ${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-train + ${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-eval + ${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-train + ${MAKE} DEVICE=$(DEVICE) test-smolvla-ete-eval + +test-act-ete-train: + lerobot-train \ + --policy.type=act \ + --policy.dim_model=64 \ + --policy.n_action_steps=20 \ + --policy.chunk_size=20 \ + --policy.device=$(DEVICE) \ + --policy.push_to_hub=false \ + --env.type=aloha \ + --env.episode_length=5 \ + --dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \ + --dataset.image_transforms.enable=true \ + --dataset.episodes="[0]" \ + --batch_size=2 \ + --steps=4 \ + --eval_freq=2 \ + --eval.n_episodes=1 \ + --eval.batch_size=1 \ + --save_freq=2 \ + --save_checkpoint=true \ + --log_freq=1 \ + --wandb.enable=false \ + --output_dir=tests/outputs/act/ + +test-act-ete-train-resume: + lerobot-train \ + --config_path=tests/outputs/act/checkpoints/000002/pretrained_model/train_config.json \ + --resume=true + +test-act-ete-eval: + lerobot-eval \ + --policy.path=tests/outputs/act/checkpoints/000004/pretrained_model \ + --policy.device=$(DEVICE) \ + --env.type=aloha \ + --env.episode_length=5 \ + --eval.n_episodes=1 \ + --eval.batch_size=1 + +test-diffusion-ete-train: + lerobot-train \ + --policy.type=diffusion \ + --policy.down_dims='[64,128,256]' \ + --policy.diffusion_step_embed_dim=32 \ + --policy.num_inference_steps=10 \ + --policy.device=$(DEVICE) \ + --policy.push_to_hub=false \ + --env.type=pusht \ + --env.episode_length=5 \ + --dataset.repo_id=lerobot/pusht \ + --dataset.image_transforms.enable=true \ + --dataset.episodes="[0]" \ + --batch_size=2 \ + --steps=2 \ + --eval_freq=2 \ + --eval.n_episodes=1 \ + --eval.batch_size=1 \ + --save_checkpoint=true \ + --save_freq=2 \ + --log_freq=1 \ + --wandb.enable=false \ + --output_dir=tests/outputs/diffusion/ + +test-diffusion-ete-eval: + lerobot-eval \ + --policy.path=tests/outputs/diffusion/checkpoints/000002/pretrained_model \ + --policy.device=$(DEVICE) \ + --env.type=pusht \ + --env.episode_length=5 \ + --eval.n_episodes=1 \ + --eval.batch_size=1 + +test-tdmpc-ete-train: + lerobot-train \ + --policy.type=tdmpc \ + --policy.device=$(DEVICE) \ + --policy.push_to_hub=false \ + --env.type=pusht \ + --env.episode_length=5 \ + --dataset.repo_id=lerobot/pusht_image \ + --dataset.image_transforms.enable=true \ + --dataset.episodes="[0]" \ + --batch_size=2 \ + --steps=2 \ + --eval_freq=2 \ + --eval.n_episodes=1 \ + --eval.batch_size=1 \ + --save_checkpoint=true \ + --save_freq=2 \ + --log_freq=1 \ + --wandb.enable=false \ + --output_dir=tests/outputs/tdmpc/ + +test-tdmpc-ete-eval: + lerobot-eval \ + --policy.path=tests/outputs/tdmpc/checkpoints/000002/pretrained_model \ + --policy.device=$(DEVICE) \ + --env.type=pusht \ + --env.episode_length=5 \ + --env.observation_height=96 \ + --env.observation_width=96 \ + --eval.n_episodes=1 \ + --eval.batch_size=1 + + +test-smolvla-ete-train: + lerobot-train \ + --policy.type=smolvla \ + --policy.n_action_steps=20 \ + --policy.chunk_size=20 \ + --policy.device=$(DEVICE) \ + --policy.push_to_hub=false \ + --env.type=aloha \ + --env.episode_length=5 \ + --dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \ + --dataset.image_transforms.enable=true \ + --dataset.episodes="[0]" \ + --batch_size=2 \ + --steps=4 \ + --eval_freq=2 \ + --eval.n_episodes=1 \ + --eval.batch_size=1 \ + --save_freq=2 \ + --save_checkpoint=true \ + --log_freq=1 \ + --wandb.enable=false \ + --output_dir=tests/outputs/smolvla/ + +test-smolvla-ete-eval: + lerobot-eval \ + --policy.path=tests/outputs/smolvla/checkpoints/000004/pretrained_model \ + --policy.device=$(DEVICE) \ + --env.type=aloha \ + --env.episode_length=5 \ + --eval.n_episodes=1 \ + --eval.batch_size=1 diff --git a/lerobot/README.md b/lerobot/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f58b337b30286ca5865e727659c0ba2114674a92 --- /dev/null +++ b/lerobot/README.md @@ -0,0 +1,176 @@ +

+ LeRobot, Hugging Face Robotics Library +

+ +
+ +[![Tests](https://github.com/huggingface/lerobot/actions/workflows/nightly.yml/badge.svg?branch=main)](https://github.com/huggingface/lerobot/actions/workflows/nightly.yml?query=branch%3Amain) +[![Python versions](https://img.shields.io/pypi/pyversions/lerobot)](https://www.python.org/downloads/) +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/huggingface/lerobot/blob/main/LICENSE) +[![Status](https://img.shields.io/pypi/status/lerobot)](https://pypi.org/project/lerobot/) +[![Version](https://img.shields.io/pypi/v/lerobot)](https://pypi.org/project/lerobot/) +[![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-v2.1-ff69b4.svg)](https://github.com/huggingface/lerobot/blob/main/CODE_OF_CONDUCT.md) +[![Discord](https://img.shields.io/badge/Discord-Join_Us-5865F2?style=flat&logo=discord&logoColor=white)](https://discord.gg/q8Dzzpym3f) + +
+ +**LeRobot** aims to provide models, datasets, and tools for real-world robotics in PyTorch. The goal is to lower the barrier to entry so that everyone can contribute to and benefit from shared datasets and pretrained models. + +🤗 A hardware-agnostic, Python-native interface that standardizes control across diverse platforms, from low-cost arms (SO-100) to humanoids. + +🤗 A standardized, scalable LeRobotDataset format (Parquet + MP4 or images) hosted on the Hugging Face Hub, enabling efficient storage, streaming and visualization of massive robotic datasets. + +🤗 State-of-the-art policies that have been shown to transfer to the real-world ready for training and deployment. + +🤗 Comprehensive support for the open-source ecosystem to democratize physical AI. + +## Quick Start + +LeRobot can be installed directly from PyPI. + +```bash +pip install lerobot +lerobot-info +``` + +> [!IMPORTANT] +> For detailed installation guide, please see the [Installation Documentation](https://huggingface.co/docs/lerobot/installation). + +## Robots & Control + +
+ Reachy 2 Demo +
+ +LeRobot provides a unified `Robot` class interface that decouples control logic from hardware specifics. It supports a wide range of robots and teleoperation devices. + +```python +from lerobot.robots.myrobot import MyRobot + +# Connect to a robot +robot = MyRobot(config=...) +robot.connect() + +# Read observation and send action +obs = robot.get_observation() +action = model.select_action(obs) +robot.send_action(action) +``` + +**Supported Hardware:** SO100, LeKiwi, Koch, HopeJR, OMX, EarthRover, Reachy2, Gamepads, Keyboards, Phones, OpenARM, Unitree G1. + +While these devices are natively integrated into the LeRobot codebase, the library is designed to be extensible. You can easily implement the Robot interface to utilize LeRobot's data collection, training, and visualization tools for your own custom robot. + +For detailed hardware setup guides, see the [Hardware Documentation](https://huggingface.co/docs/lerobot/integrate_hardware). + +## LeRobot Dataset + +To solve the data fragmentation problem in robotics, we utilize the **LeRobotDataset** format. + +- **Structure:** Synchronized MP4 videos (or images) for vision and Parquet files for state/action data. +- **HF Hub Integration:** Explore thousands of robotics datasets on the [Hugging Face Hub](https://huggingface.co/lerobot). +- **Tools:** Seamlessly delete episodes, split by indices/fractions, add/remove features, and merge multiple datasets. + +```python +from lerobot.datasets.lerobot_dataset import LeRobotDataset + +# Load a dataset from the Hub +dataset = LeRobotDataset("lerobot/aloha_mobile_cabinet") + +# Access data (automatically handles video decoding) +episode_index=0 +print(f"{dataset[episode_index]['action'].shape=}\n") +``` + +Learn more about it in the [LeRobotDataset Documentation](https://huggingface.co/docs/lerobot/lerobot-dataset-v3) + +## SoTA Models + +LeRobot implements state-of-the-art policies in pure PyTorch, covering Imitation Learning, Reinforcement Learning, and Vision-Language-Action (VLA) models, with more coming soon. It also provides you with the tools to instrument and inspect your training process. + +

+ Gr00t Architecture +

+ +Training a policy is as simple as running a script configuration: + +```bash +lerobot-train \ + --policy=act \ + --dataset.repo_id=lerobot/aloha_mobile_cabinet +``` + +| Category | Models | +| -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **Imitation Learning** | [ACT](./docs/source/policy_act_README.md), [Diffusion](./docs/source/policy_diffusion_README.md), [VQ-BeT](./docs/source/policy_vqbet_README.md) | +| **Reinforcement Learning** | [HIL-SERL](./docs/source/hilserl.mdx), [TDMPC](./docs/source/policy_tdmpc_README.md) & QC-FQL (coming soon) | +| **VLAs Models** | [Pi0Fast](./docs/source/pi0fast.mdx), [Pi0.5](./docs/source/pi05.mdx), [GR00T N1.5](./docs/source/policy_groot_README.md), [SmolVLA](./docs/source/policy_smolvla_README.md), [XVLA](./docs/source/xvla.mdx) | + +Similarly to the hardware, you can easily implement your own policy & leverage LeRobot's data collection, training, and visualization tools, and share your model to the HF Hub + +For detailed policy setup guides, see the [Policy Documentation](https://huggingface.co/docs/lerobot/bring_your_own_policies). + +## Inference & Evaluation + +Evaluate your policies in simulation or on real hardware using the unified evaluation script. LeRobot supports standard benchmarks like **LIBERO**, **MetaWorld** and more to come. + +```bash +# Evaluate a policy on the LIBERO benchmark +lerobot-eval \ + --policy.path=lerobot/pi0_libero_finetuned \ + --env.type=libero \ + --env.task=libero_object \ + --eval.n_episodes=10 +``` + +Learn how to implement your own simulation environment or benchmark and distribute it from the HF Hub by following the [EnvHub Documentation](https://huggingface.co/docs/lerobot/envhub) + +## Resources + +- **[Documentation](https://huggingface.co/docs/lerobot/index):** The complete guide to tutorials & API. +- **[Chinese Tutorials: LeRobot+SO-ARM101中文教程-同济子豪兄](https://zihao-ai.feishu.cn/wiki/space/7589642043471924447)** Detailed doc for assembling, teleoperate, dataset, train, deploy. Verified by Seed Studio and 5 global hackathon players. +- **[Discord](https://discord.gg/q8Dzzpym3f):** Join the `LeRobot` server to discuss with the community. +- **[X](https://x.com/LeRobotHF):** Follow us on X to stay up-to-date with the latest developments. +- **[Robot Learning Tutorial](https://huggingface.co/spaces/lerobot/robot-learning-tutorial):** A free, hands-on course to learn robot learning using LeRobot. + +## Citation + +If you use LeRobot in your project, please cite the GitHub repository to acknowledge the ongoing development and contributors: + +```bibtex +@misc{cadene2024lerobot, + author = {Cadene, Remi and Alibert, Simon and Soare, Alexander and Gallouedec, Quentin and Zouitine, Adil and Palma, Steven and Kooijmans, Pepijn and Aractingi, Michel and Shukor, Mustafa and Aubakirova, Dana and Russi, Martino and Capuano, Francesco and Pascal, Caroline and Choghari, Jade and Moss, Jess and Wolf, Thomas}, + title = {LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch}, + howpublished = "\url{https://github.com/huggingface/lerobot}", + year = {2024} +} +``` + +If you are referencing our research or the academic paper, please also cite our ICLR publication: + +
+ICLR 2026 Paper + +```bibtex +@inproceedings{cadenelerobot, + title={LeRobot: An Open-Source Library for End-to-End Robot Learning}, + author={Cadene, Remi and Alibert, Simon and Capuano, Francesco and Aractingi, Michel and Zouitine, Adil and Kooijmans, Pepijn and Choghari, Jade and Russi, Martino and Pascal, Caroline and Palma, Steven and Shukor, Mustafa and Moss, Jess and Soare, Alexander and Aubakirova, Dana and Lhoest, Quentin and Gallou\'edec, Quentin and Wolf, Thomas}, + booktitle={The Fourteenth International Conference on Learning Representations}, + year={2026}, + url={https://arxiv.org/abs/2602.22818} +} +``` + +
+ +## Contribute + +We welcome contributions from everyone in the community! To get started, please read our [CONTRIBUTING.md](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md) guide. Whether you're adding a new feature, improving documentation, or fixing a bug, your help and feedback are invaluable. We're incredibly excited about the future of open-source robotics and can't wait to work with you on what's next—thank you for your support! + +

+ SO101 Video +

+ +
+Built by the LeRobot team at Hugging Face with ❤️ +
diff --git a/lerobot/SECURITY.md b/lerobot/SECURITY.md new file mode 100644 index 0000000000000000000000000000000000000000..cf58f6cdb6bcd220fae063409fba9b4d1a25d8d1 --- /dev/null +++ b/lerobot/SECURITY.md @@ -0,0 +1,48 @@ +# Security Policy + +## Project Status & Philosophy + +`lerobot` has so far been primarily a research and prototyping tool, which is why deployment security hasn’t been a strong focus until now. As `lerobot` continues to be adopted and deployed in production, we are paying much closer attention to these kinds of issues. + +Fortunately, being an open-source project, the community can also help by reporting and fixing vulnerabilities. We appreciate your efforts to responsibly disclose your findings and will make every effort to acknowledge your contributions. + +## Reporting a Vulnerability + +To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/huggingface/lerobot/security/advisories/new) tab. + +The `lerobot` team will send a response indicating the next steps in handling your report. After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance. + +#### Hugging Face Security Team + +Since this project is part of the Hugging Face ecosystem, feel free to submit vulnerability reports directly to: **[security@huggingface.co](mailto:security@huggingface.co)**. Someone from the HF security team will review the report and recommend next steps. + +#### Open Source Disclosures + +If reporting a vulnerability specific to the open-source codebase (and not the underlying Hub infrastructure), you may also use [Huntr](https://huntr.com), a vulnerability disclosure program for open source software. + +## Supported Versions + +Currently, we treat `lerobot` as a rolling release. We prioritize security updates for the latest available version (`main` branch). + +| Version | Supported | +| -------- | --------- | +| Latest | ✅ | +| < Latest | ❌ | + +## Secure Usage Guidelines + +`lerobot` is tightly coupled to the Hugging Face Hub for sharing data and pretrained policies. When downloading artifacts uploaded by others, you expose yourself to risks. Please read below for recommendations to keep your runtime and robot environment safe. + +### Remote Artefacts (Weights & Policies) + +Models and policies uploaded to the Hugging Face Hub come in different formats. We heavily recommend uploading and downloading models in the [`safetensors`](https://github.com/huggingface/safetensors) format. + +`safetensors` was developed specifically to prevent arbitrary code execution on your system, which is critical when running software on physical hardware/robots. + +To avoid loading models from unsafe formats (e.g., `pickle`), you should ensure you are prioritizing `safetensors` files. + +### Remote Code + +Some models or environments on the Hub may require `trust_remote_code=True` to run custom architecture code. + +Please **always** verify the content of the modeling files when using this argument. We recommend setting a specific `revision` (commit hash) when loading remote code to ensure you protect yourself from unverified updates to the repository. diff --git a/lerobot/benchmarks/video/README.md b/lerobot/benchmarks/video/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1feee69c46e66b3d156e8e92b344c9ccd9893dc0 --- /dev/null +++ b/lerobot/benchmarks/video/README.md @@ -0,0 +1,288 @@ +# Video benchmark + +## Questions + +What is the optimal trade-off between: + +- maximizing loading time with random access, +- minimizing memory space on disk, +- maximizing success rate of policies, +- compatibility across devices/platforms for decoding videos (e.g. video players, web browsers). + +How to encode videos? + +- Which video codec (`-vcodec`) to use? h264, h265, AV1? +- What pixel format to use (`-pix_fmt`)? `yuv444p` or `yuv420p`? +- How much compression (`-crf`)? No compression with `0`, intermediate compression with `25` or extreme with `50+`? +- Which frequency to chose for key frames (`-g`)? A key frame every `10` frames? + +How to decode videos? + +- Which `decoder`? `torchvision`, `torchaudio`, `ffmpegio`, `decord`, or `nvc`? +- What scenarios to use for the requesting timestamps during benchmark? (`timestamps_mode`) + +## Variables + +**Image content & size** +We don't expect the same optimal settings for a dataset of images from a simulation, or from real-world in an apartment, or in a factory, or outdoor, or with lots of moving objects in the scene, etc. Similarly, loading times might not vary linearly with the image size (resolution). +For these reasons, we run this benchmark on four representative datasets: + +- `lerobot/pusht_image`: (96 x 96 pixels) simulation with simple geometric shapes, fixed camera. +- `lerobot/aloha_mobile_shrimp_image`: (480 x 640 pixels) real-world indoor, moving camera. +- `lerobot/paris_street`: (720 x 1280 pixels) real-world outdoor, moving camera. +- `lerobot/kitchen`: (1080 x 1920 pixels) real-world indoor, fixed camera. + +Note: The datasets used for this benchmark need to be image datasets, not video datasets. + +**Data augmentations** +We might revisit this benchmark and find better settings if we train our policies with various data augmentations to make them more robust (e.g. robust to color changes, compression, etc.). + +### Encoding parameters + +| parameter | values | +| ----------- | ------------------------------------------------------------ | +| **vcodec** | `libx264`, `libx265`, `libsvtav1` | +| **pix_fmt** | `yuv444p`, `yuv420p` | +| **g** | `1`, `2`, `3`, `4`, `5`, `6`, `10`, `15`, `20`, `40`, `None` | +| **crf** | `0`, `5`, `10`, `15`, `20`, `25`, `30`, `40`, `50`, `None` | + +Note that `crf` value might be interpreted differently by various video codecs. In other words, the same value used with one codec doesn't necessarily translate into the same compression level with another codec. In fact, the default value (`None`) isn't the same amongst the different video codecs. Importantly, it is also the case for many other ffmpeg arguments like `g` which specifies the frequency of the key frames. + +For a comprehensive list and documentation of these parameters, see the ffmpeg documentation depending on the video codec used: + +- h264: https://trac.ffmpeg.org/wiki/Encode/H.264 +- h265: https://trac.ffmpeg.org/wiki/Encode/H.265 +- AV1: https://trac.ffmpeg.org/wiki/Encode/AV1 + +### Decoding parameters + +**Decoder** +We tested two video decoding backends from torchvision: + +- `pyav` +- `video_reader` (requires to build torchvision from source) + +**Requested timestamps** +Given the way video decoding works, once a keyframe has been loaded, the decoding of subsequent frames is fast. +This of course is affected by the `-g` parameter during encoding, which specifies the frequency of the keyframes. Given our typical use cases in robotics policies which might request a few timestamps in different random places, we want to replicate these use cases with the following scenarios: + +- `1_frame`: 1 frame, +- `2_frames`: 2 consecutive frames (e.g. `[t, t + 1 / fps]`), +- `6_frames`: 6 consecutive frames (e.g. `[t + i / fps for i in range(6)]`) + +Note that this differs significantly from a typical use case like watching a movie, in which every frame is loaded sequentially from the beginning to the end and it's acceptable to have big values for `-g`. + +Additionally, because some policies might request single timestamps that are a few frames apart, we also have the following scenario: + +- `2_frames_4_space`: 2 frames with 4 consecutive frames of spacing in between (e.g `[t, t + 5 / fps]`), + +However, due to how video decoding is implemented with `pyav`, we don't have access to an accurate seek so in practice this scenario is essentially the same as `6_frames` since all 6 frames between `t` and `t + 5 / fps` will be decoded. + +## Metrics + +**Data compression ratio (lower is better)** +`video_images_size_ratio` is the ratio of the memory space on disk taken by the encoded video over the memory space taken by the original images. For instance, `video_images_size_ratio=25%` means that the video takes 4 times less memory space on disk compared to the original images. + +**Loading time ratio (lower is better)** +`video_images_load_time_ratio` is the ratio of the time it takes to decode frames from the video at a given timestamps over the time it takes to load the exact same original images. Lower is better. For instance, `video_images_load_time_ratio=200%` means that decoding from video is 2 times slower than loading the original images. + +**Average Mean Square Error (lower is better)** +`avg_mse` is the average mean square error between each decoded frame and its corresponding original image over all requested timestamps, and also divided by the number of pixels in the image to be comparable when switching to different image sizes. + +**Average Peak Signal to Noise Ratio (higher is better)** +`avg_psnr` measures the ratio between the maximum possible power of a signal and the power of corrupting noise that affects the fidelity of its representation. Higher PSNR indicates better quality. + +**Average Structural Similarity Index Measure (higher is better)** +`avg_ssim` evaluates the perceived quality of images by comparing luminance, contrast, and structure. SSIM values range from -1 to 1, where 1 indicates perfect similarity. + +One aspect that can't be measured here with those metrics is the compatibility of the encoding across platforms, in particular on web browser, for visualization purposes. +h264, h265 and AV1 are all commonly used codecs and should not pose an issue. However, the chroma subsampling (`pix_fmt`) format might affect compatibility: + +- `yuv420p` is more widely supported across various platforms, including web browsers. +- `yuv444p` offers higher color fidelity but might not be supported as broadly. + + + +## How the benchmark works + +The benchmark evaluates both encoding and decoding of video frames on the first episode of each dataset. + +**Encoding:** for each `vcodec` and `pix_fmt` pair, we use a default value for `g` and `crf` upon which we change a single value (either `g` or `crf`) to one of the specified values (we don't test every combination of those as this would be computationally too heavy). +This gives a unique set of encoding parameters which is used to encode the episode. + +**Decoding:** Then, for each of those unique encodings, we iterate through every combination of the decoding parameters `backend` and `timestamps_mode`. For each of them, we record the metrics of a number of samples (given by `--num-samples`). This is parallelized for efficiency and the number of processes can be controlled with `--num-workers`. Ideally, it's best to have a `--num-samples` that is divisible by `--num-workers`. + +Intermediate results saved for each `vcodec` and `pix_fmt` combination in csv tables. +These are then all concatenated to a single table ready for analysis. + +## Caveats + +We tried to measure the most impactful parameters for both encoding and decoding. However, for computational reasons we can't test out every combination. + +Additional encoding parameters exist that are not included in this benchmark. In particular: + +- `-preset` which allows for selecting encoding presets. This represents a collection of options that will provide a certain encoding speed to compression ratio. By leaving this parameter unspecified, it is considered to be `medium` for libx264 and libx265 and `8` for libsvtav1. +- `-tune` which allows to optimize the encoding for certain aspects (e.g. film quality, fast decoding, etc.). + +See the documentation mentioned above for more detailed info on these settings and for a more comprehensive list of other parameters. + +Similarly on the decoding side, other decoders exist but are not implemented in our current benchmark. To name a few: + +- `torchaudio` +- `ffmpegio` +- `decord` +- `nvc` + +Note as well that since we are mostly interested in the performance at decoding time (also because encoding is done only once before uploading a dataset), we did not measure encoding times nor have any metrics regarding encoding. +However, besides the necessity to build ffmpeg from source, encoding did not pose any issue and it didn't take a significant amount of time during this benchmark. + +## Install + +Building ffmpeg from source is required to include libx265 and libaom/libsvtav1 (av1) video codecs ([compilation guide](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu)). + +**Note:** While you still need to build torchvision with a conda-installed `ffmpeg<4.3` to use the `video_reader` decoder (as described in [#220](https://github.com/huggingface/lerobot/pull/220)), you also need another version which is custom-built with all the video codecs for encoding. For the script to then use that version, you can prepend the command above with `PATH="$HOME/bin:$PATH"`, which is where ffmpeg should be built. + +## Adding a video decoder + +Right now, we're only benchmarking the two video decoder available with torchvision: `pyav` and `video_reader`. +You can easily add a new decoder to benchmark by adding it to this function in the script: + +```diff +def decode_video_frames( + video_path: str, + timestamps: list[float], + tolerance_s: float, + backend: str, +) -> torch.Tensor: + if backend in ["pyav", "video_reader"]: + return decode_video_frames_torchvision( + video_path, timestamps, tolerance_s, backend + ) ++ elif backend == ["your_decoder"]: ++ return your_decoder_function( ++ video_path, timestamps, tolerance_s, backend ++ ) + else: + raise NotImplementedError(backend) +``` + +## Example + +For a quick run, you can try these parameters: + +```bash +python benchmark/video/run_video_benchmark.py \ + --output-dir outputs/video_benchmark \ + --repo-ids \ + lerobot/pusht_image \ + lerobot/aloha_mobile_shrimp_image \ + --vcodec libx264 libx265 \ + --pix-fmt yuv444p yuv420p \ + --g 2 20 None \ + --crf 10 40 None \ + --timestamps-modes 1_frame 2_frames \ + --backends pyav video_reader \ + --num-samples 5 \ + --num-workers 5 \ + --save-frames 0 +``` + +## Results + +### Reproduce + +We ran the benchmark with the following parameters: + +```bash +# h264 and h265 encodings +python benchmark/video/run_video_benchmark.py \ + --output-dir outputs/video_benchmark \ + --repo-ids \ + lerobot/pusht_image \ + lerobot/aloha_mobile_shrimp_image \ + lerobot/paris_street \ + lerobot/kitchen \ + --vcodec libx264 libx265 \ + --pix-fmt yuv444p yuv420p \ + --g 1 2 3 4 5 6 10 15 20 40 None \ + --crf 0 5 10 15 20 25 30 40 50 None \ + --timestamps-modes 1_frame 2_frames 6_frames \ + --backends pyav video_reader \ + --num-samples 50 \ + --num-workers 5 \ + --save-frames 1 + +# av1 encoding (only compatible with yuv420p and pyav decoder) +python benchmark/video/run_video_benchmark.py \ + --output-dir outputs/video_benchmark \ + --repo-ids \ + lerobot/pusht_image \ + lerobot/aloha_mobile_shrimp_image \ + lerobot/paris_street \ + lerobot/kitchen \ + --vcodec libsvtav1 \ + --pix-fmt yuv420p \ + --g 1 2 3 4 5 6 10 15 20 40 None \ + --crf 0 5 10 15 20 25 30 40 50 None \ + --timestamps-modes 1_frame 2_frames 6_frames \ + --backends pyav \ + --num-samples 50 \ + --num-workers 5 \ + --save-frames 1 +``` + +The full results are available [here](https://docs.google.com/spreadsheets/d/1OYJB43Qu8fC26k_OyoMFgGBBKfQRCi4BIuYitQnq3sw/edit?usp=sharing) + +### Parameters selected for LeRobotDataset + +Considering these results, we chose what we think is the best set of encoding parameter: + +- vcodec: `libsvtav1` +- pix-fmt: `yuv420p` +- g: `2` +- crf: `30` + +Since we're using av1 encoding, we're choosing the `pyav` decoder as `video_reader` does not support it (and `pyav` doesn't require a custom build of `torchvision`). + +### Summary + +These tables show the results for `g=2` and `crf=30`, using `timestamps-modes=6_frames` and `backend=pyav` + +| video_images_size_ratio | vcodec | pix_fmt | | | | +| --------------------------------- | ---------- | ------- | --------- | --------- | --------- | +| | libx264 | | libx265 | | libsvtav1 | +| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p | +| lerobot/pusht_image | **16.97%** | 17.58% | 18.57% | 18.86% | 22.06% | +| lerobot/aloha_mobile_shrimp_image | 2.14% | 2.11% | 1.38% | **1.37%** | 5.59% | +| lerobot/paris_street | 2.12% | 2.13% | **1.54%** | **1.54%** | 4.43% | +| lerobot/kitchen | 1.40% | 1.39% | **1.00%** | **1.00%** | 2.52% | + +| video_images_load_time_ratio | vcodec | pix_fmt | | | | +| --------------------------------- | ------- | ------- | -------- | ------- | --------- | +| | libx264 | | libx265 | | libsvtav1 | +| repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p | +| lerobot/pusht_image | 6.45 | 5.19 | **1.90** | 2.12 | 2.47 | +| lerobot/aloha_mobile_shrimp_image | 11.80 | 7.92 | 0.71 | 0.85 | **0.48** | +| lerobot/paris_street | 2.21 | 2.05 | 0.36 | 0.49 | **0.30** | +| lerobot/kitchen | 1.46 | 1.46 | 0.28 | 0.51 | **0.26** | + +| | | vcodec | pix_fmt | | | | +| --------------------------------- | -------- | -------- | ------------ | -------- | --------- | ------------ | +| | | libx264 | | libx265 | | libsvtav1 | +| repo_id | metric | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p | +| lerobot/pusht_image | avg_mse | 2.90E-04 | **2.03E-04** | 3.13E-04 | 2.29E-04 | 2.19E-04 | +| | avg_psnr | 35.44 | 37.07 | 35.49 | **37.30** | 37.20 | +| | avg_ssim | 98.28% | **98.85%** | 98.31% | 98.84% | 98.72% | +| lerobot/aloha_mobile_shrimp_image | avg_mse | 2.76E-04 | 2.59E-04 | 3.17E-04 | 3.06E-04 | **1.30E-04** | +| | avg_psnr | 35.91 | 36.21 | 35.88 | 36.09 | **40.17** | +| | avg_ssim | 95.19% | 95.18% | 95.00% | 95.05% | **97.73%** | +| lerobot/paris_street | avg_mse | 6.89E-04 | 6.70E-04 | 4.03E-03 | 4.02E-03 | **3.09E-04** | +| | avg_psnr | 33.48 | 33.68 | 32.05 | 32.15 | **35.40** | +| | avg_ssim | 93.76% | 93.75% | 89.46% | 89.46% | **95.46%** | +| lerobot/kitchen | avg_mse | 2.50E-04 | 2.24E-04 | 4.28E-04 | 4.18E-04 | **1.53E-04** | +| | avg_psnr | 36.73 | 37.33 | 36.56 | 36.75 | **39.12** | +| | avg_ssim | 95.47% | 95.58% | 95.52% | 95.53% | **96.82%** | diff --git a/lerobot/benchmarks/video/run_video_benchmark.py b/lerobot/benchmarks/video/run_video_benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..064a84b48e4055955ceae0caa948d0eef38fa690 --- /dev/null +++ b/lerobot/benchmarks/video/run_video_benchmark.py @@ -0,0 +1,488 @@ +#!/usr/bin/env python + +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Assess the performance of video decoding in various configurations. + +This script will benchmark different video encoding and decoding parameters. +See the provided README.md or run `python benchmark/video/run_video_benchmark.py --help` for usage info. +""" + +import argparse +import datetime as dt +import itertools +import random +import shutil +from collections import OrderedDict +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path +from threading import Lock + +import einops +import numpy as np +import pandas as pd +import PIL +import torch +from skimage.metrics import mean_squared_error, peak_signal_noise_ratio, structural_similarity +from tqdm import tqdm + +from lerobot.datasets.lerobot_dataset import LeRobotDataset +from lerobot.datasets.video_utils import ( + decode_video_frames, + encode_video_frames, +) +from lerobot.utils.constants import OBS_IMAGE +from lerobot.utils.utils import TimerManager + +BASE_ENCODING = OrderedDict( + [ + ("vcodec", "libx264"), + ("pix_fmt", "yuv444p"), + ("g", 2), + ("crf", None), + # TODO(aliberts): Add fastdecode + # ("fastdecode", 0), + ] +) + + +# TODO(rcadene, aliberts): move to `utils.py` folder when we want to refactor +def parse_int_or_none(value) -> int | None: + if value.lower() == "none": + return None + try: + return int(value) + except ValueError as e: + raise argparse.ArgumentTypeError(f"Invalid int or None: {value}") from e + + +def check_datasets_formats(repo_ids: list) -> None: + for repo_id in repo_ids: + dataset = LeRobotDataset(repo_id) + if len(dataset.meta.video_keys) > 0: + raise ValueError( + f"Use only image dataset for running this benchmark. Video dataset provided: {repo_id}" + ) + + +def get_directory_size(directory: Path) -> int: + total_size = 0 + for item in directory.rglob("*"): + if item.is_file(): + total_size += item.stat().st_size + return total_size + + +def load_original_frames(imgs_dir: Path, timestamps: list[float], fps: int) -> torch.Tensor: + frames = [] + for ts in timestamps: + idx = int(ts * fps) + frame = PIL.Image.open(imgs_dir / f"frame-{idx:06d}.png") + frame = torch.from_numpy(np.array(frame)) + frame = frame.type(torch.float32) / 255 + frame = einops.rearrange(frame, "h w c -> c h w") + frames.append(frame) + return torch.stack(frames) + + +def save_decoded_frames( + imgs_dir: Path, save_dir: Path, frames: torch.Tensor, timestamps: list[float], fps: int +) -> None: + if save_dir.exists() and len(list(save_dir.glob("frame-*.png"))) == len(timestamps): + return + + save_dir.mkdir(parents=True, exist_ok=True) + for i, ts in enumerate(timestamps): + idx = int(ts * fps) + frame_hwc = (frames[i].permute((1, 2, 0)) * 255).type(torch.uint8).cpu().numpy() + PIL.Image.fromarray(frame_hwc).save(save_dir / f"frame-{idx:06d}_decoded.png") + shutil.copyfile(imgs_dir / f"frame-{idx:06d}.png", save_dir / f"frame-{idx:06d}_original.png") + + +def save_first_episode(imgs_dir: Path, dataset: LeRobotDataset) -> None: + episode_index = 0 + ep_num_images = dataset.meta.episodes["length"][episode_index] + if imgs_dir.exists() and len(list(imgs_dir.glob("frame-*.png"))) == ep_num_images: + return + + imgs_dir.mkdir(parents=True, exist_ok=True) + hf_dataset = dataset.hf_dataset.with_format(None) + + # We only save images from the first camera + img_keys = [key for key in hf_dataset.features if key.startswith(OBS_IMAGE)] + imgs_dataset = hf_dataset.select_columns(img_keys[0]) + + for i, item in enumerate( + tqdm(imgs_dataset, desc=f"saving {dataset.repo_id} first episode images", leave=False) + ): + img = item[img_keys[0]] + img.save(str(imgs_dir / f"frame-{i:06d}.png"), quality=100) + + if i >= ep_num_images - 1: + break + + +def sample_timestamps(timestamps_mode: str, ep_num_images: int, fps: int) -> list[float]: + # Start at 5 to allow for 2_frames_4_space and 6_frames + idx = random.randint(5, ep_num_images - 1) + match timestamps_mode: + case "1_frame": + frame_indexes = [idx] + case "2_frames": + frame_indexes = [idx - 1, idx] + case "2_frames_4_space": + frame_indexes = [idx - 5, idx] + case "6_frames": + frame_indexes = [idx - i for i in range(6)][::-1] + case _: + raise ValueError(timestamps_mode) + + return [idx / fps for idx in frame_indexes] + + +def benchmark_decoding( + imgs_dir: Path, + video_path: Path, + timestamps_mode: str, + backend: str, + ep_num_images: int, + fps: int, + num_samples: int = 50, + num_workers: int = 4, + save_frames: bool = False, +) -> dict: + def process_sample(sample: int, lock: Lock): + time_benchmark = TimerManager(log=False) + timestamps = sample_timestamps(timestamps_mode, ep_num_images, fps) + num_frames = len(timestamps) + result = { + "psnr_values": [], + "ssim_values": [], + "mse_values": [], + } + + with time_benchmark, lock: + frames = decode_video_frames(video_path, timestamps=timestamps, tolerance_s=5e-1, backend=backend) + result["load_time_video_ms"] = (time_benchmark.last * 1000) / num_frames + + with time_benchmark: + original_frames = load_original_frames(imgs_dir, timestamps, fps) + result["load_time_images_ms"] = (time_benchmark.last * 1000) / num_frames + + frames_np, original_frames_np = frames.numpy(), original_frames.numpy() + for i in range(num_frames): + result["mse_values"].append(mean_squared_error(original_frames_np[i], frames_np[i])) + result["psnr_values"].append( + peak_signal_noise_ratio(original_frames_np[i], frames_np[i], data_range=1.0) + ) + result["ssim_values"].append( + structural_similarity(original_frames_np[i], frames_np[i], data_range=1.0, channel_axis=0) + ) + + if save_frames and sample == 0: + save_dir = video_path.with_suffix("") / f"{timestamps_mode}_{backend}" + save_decoded_frames(imgs_dir, save_dir, frames, timestamps, fps) + + return result + + load_times_video_ms = [] + load_times_images_ms = [] + mse_values = [] + psnr_values = [] + ssim_values = [] + + # A sample is a single set of decoded frames specified by timestamps_mode (e.g. a single frame, 2 frames, etc.). + # For each sample, we record metrics (loading time and quality metrics) which are then averaged over all samples. + # As these samples are independent, we run them in parallel threads to speed up the benchmark. + # Use a single shared lock for all worker threads + shared_lock = Lock() + with ThreadPoolExecutor(max_workers=num_workers) as executor: + futures = [executor.submit(process_sample, i, shared_lock) for i in range(num_samples)] + for future in tqdm(as_completed(futures), total=num_samples, desc="samples", leave=False): + result = future.result() + load_times_video_ms.append(result["load_time_video_ms"]) + load_times_images_ms.append(result["load_time_images_ms"]) + psnr_values.extend(result["psnr_values"]) + ssim_values.extend(result["ssim_values"]) + mse_values.extend(result["mse_values"]) + + avg_load_time_video_ms = float(np.array(load_times_video_ms).mean()) + avg_load_time_images_ms = float(np.array(load_times_images_ms).mean()) + video_images_load_time_ratio = avg_load_time_video_ms / avg_load_time_images_ms + + return { + "avg_load_time_video_ms": avg_load_time_video_ms, + "avg_load_time_images_ms": avg_load_time_images_ms, + "video_images_load_time_ratio": video_images_load_time_ratio, + "avg_mse": float(np.mean(mse_values)), + "avg_psnr": float(np.mean(psnr_values)), + "avg_ssim": float(np.mean(ssim_values)), + } + + +def benchmark_encoding_decoding( + dataset: LeRobotDataset, + video_path: Path, + imgs_dir: Path, + encoding_cfg: dict, + decoding_cfg: dict, + num_samples: int, + num_workers: int, + save_frames: bool, + overwrite: bool = False, + seed: int = 1337, +) -> list[dict]: + fps = dataset.fps + + if overwrite or not video_path.is_file(): + tqdm.write(f"encoding {video_path}") + encode_video_frames( + imgs_dir=imgs_dir, + video_path=video_path, + fps=fps, + vcodec=encoding_cfg["vcodec"], + pix_fmt=encoding_cfg["pix_fmt"], + g=encoding_cfg.get("g"), + crf=encoding_cfg.get("crf"), + # fast_decode=encoding_cfg.get("fastdecode"), + overwrite=True, + ) + + episode_index = 0 + ep_num_images = dataset.meta.episodes["length"][episode_index] + width, height = tuple(dataset[0][dataset.meta.camera_keys[0]].shape[-2:]) + num_pixels = width * height + video_size_bytes = video_path.stat().st_size + images_size_bytes = get_directory_size(imgs_dir) + video_images_size_ratio = video_size_bytes / images_size_bytes + + random.seed(seed) + benchmark_table = [] + for timestamps_mode in tqdm( + decoding_cfg["timestamps_modes"], desc="decodings (timestamps_modes)", leave=False + ): + for backend in tqdm(decoding_cfg["backends"], desc="decodings (backends)", leave=False): + benchmark_row = benchmark_decoding( + imgs_dir, + video_path, + timestamps_mode, + backend, + ep_num_images, + fps, + num_samples, + num_workers, + save_frames, + ) + benchmark_row.update( + **{ + "repo_id": dataset.repo_id, + "resolution": f"{width} x {height}", + "num_pixels": num_pixels, + "video_size_bytes": video_size_bytes, + "images_size_bytes": images_size_bytes, + "video_images_size_ratio": video_images_size_ratio, + "timestamps_mode": timestamps_mode, + "backend": backend, + }, + **encoding_cfg, + ) + benchmark_table.append(benchmark_row) + + return benchmark_table + + +def main( + output_dir: Path, + repo_ids: list[str], + vcodec: list[str], + pix_fmt: list[str], + g: list[int], + crf: list[int], + # fastdecode: list[int], + timestamps_modes: list[str], + backends: list[str], + num_samples: int, + num_workers: int, + save_frames: bool, +): + check_datasets_formats(repo_ids) + encoding_benchmarks = { + "g": g, + "crf": crf, + # "fastdecode": fastdecode, + } + decoding_benchmarks = { + "timestamps_modes": timestamps_modes, + "backends": backends, + } + headers = ["repo_id", "resolution", "num_pixels"] + headers += list(BASE_ENCODING.keys()) + headers += [ + "timestamps_mode", + "backend", + "video_size_bytes", + "images_size_bytes", + "video_images_size_ratio", + "avg_load_time_video_ms", + "avg_load_time_images_ms", + "video_images_load_time_ratio", + "avg_mse", + "avg_psnr", + "avg_ssim", + ] + file_paths = [] + for video_codec in tqdm(vcodec, desc="encodings (vcodec)"): + for pixel_format in tqdm(pix_fmt, desc="encodings (pix_fmt)", leave=False): + benchmark_table = [] + for repo_id in tqdm(repo_ids, desc="encodings (datasets)", leave=False): + dataset = LeRobotDataset(repo_id) + imgs_dir = output_dir / "images" / dataset.repo_id.replace("/", "_") + # We only use the first episode + save_first_episode(imgs_dir, dataset) + for duet in [ + dict(zip(encoding_benchmarks.keys(), unique_combination, strict=False)) + for unique_combination in itertools.product(*encoding_benchmarks.values()) + ]: + encoding_cfg = BASE_ENCODING.copy() + encoding_cfg["vcodec"] = video_codec + encoding_cfg["pix_fmt"] = pixel_format + for key, value in duet.items(): + encoding_cfg[key] = value + args_path = Path("_".join(str(value) for value in encoding_cfg.values())) + video_path = output_dir / "videos" / args_path / f"{repo_id.replace('/', '_')}.mp4" + benchmark_table += benchmark_encoding_decoding( + dataset, + video_path, + imgs_dir, + encoding_cfg, + decoding_benchmarks, + num_samples, + num_workers, + save_frames, + ) + + # Save intermediate results + benchmark_df = pd.DataFrame(benchmark_table, columns=headers) + now = dt.datetime.now() + csv_path = ( + output_dir + / f"{now:%Y-%m-%d}_{now:%H-%M-%S}_{video_codec}_{pixel_format}_{num_samples}-samples.csv" + ) + benchmark_df.to_csv(csv_path, header=True, index=False) + file_paths.append(csv_path) + del benchmark_df + + # Concatenate all results + df_list = [pd.read_csv(csv_path) for csv_path in file_paths] + concatenated_df = pd.concat(df_list, ignore_index=True) + concatenated_path = output_dir / f"{now:%Y-%m-%d}_{now:%H-%M-%S}_all_{num_samples}-samples.csv" + concatenated_df.to_csv(concatenated_path, header=True, index=False) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--output-dir", + type=Path, + default=Path("outputs/video_benchmark"), + help="Directory where the video benchmark outputs are written.", + ) + parser.add_argument( + "--repo-ids", + type=str, + nargs="*", + default=[ + "lerobot/pusht_image", + "lerobot/aloha_mobile_shrimp_image", + "lerobot/paris_street", + "lerobot/kitchen", + ], + help="Datasets repo-ids to test against. First episodes only are used. Must be images.", + ) + parser.add_argument( + "--vcodec", + type=str, + nargs="*", + default=["h264", "hevc", "libsvtav1"], + help="Video codecs to be tested", + ) + parser.add_argument( + "--pix-fmt", + type=str, + nargs="*", + default=["yuv444p", "yuv420p"], + help="Pixel formats (chroma subsampling) to be tested", + ) + parser.add_argument( + "--g", + type=parse_int_or_none, + nargs="*", + default=[1, 2, 3, 4, 5, 6, 10, 15, 20, 40, 100, None], + help="Group of pictures sizes to be tested.", + ) + parser.add_argument( + "--crf", + type=parse_int_or_none, + nargs="*", + default=[0, 5, 10, 15, 20, 25, 30, 40, 50, None], + help="Constant rate factors to be tested.", + ) + # parser.add_argument( + # "--fastdecode", + # type=int, + # nargs="*", + # default=[0, 1], + # help="Use the fastdecode tuning option. 0 disables it. " + # "For libx264 and libx265/hevc, only 1 is possible. " + # "For libsvtav1, 1, 2 or 3 are possible values with a higher number meaning a faster decoding optimization", + # ) + parser.add_argument( + "--timestamps-modes", + type=str, + nargs="*", + default=[ + "1_frame", + "2_frames", + "2_frames_4_space", + "6_frames", + ], + help="Timestamps scenarios to be tested.", + ) + parser.add_argument( + "--backends", + type=str, + nargs="*", + default=["torchcodec", "pyav"], + help="Torchvision decoding backend to be tested.", + ) + parser.add_argument( + "--num-samples", + type=int, + default=50, + help="Number of samples for each encoding x decoding config.", + ) + parser.add_argument( + "--num-workers", + type=int, + default=10, + help="Number of processes for parallelized sample processing.", + ) + parser.add_argument( + "--save-frames", + type=int, + default=0, + help="Whether to save decoded frames or not. Enter a non-zero number for true.", + ) + args = parser.parse_args() + main(**vars(args)) diff --git a/lerobot/docker/Dockerfile.internal b/lerobot/docker/Dockerfile.internal new file mode 100644 index 0000000000000000000000000000000000000000..b385fc51c8d3e3e81181709dfd3b053260c2fd31 --- /dev/null +++ b/lerobot/docker/Dockerfile.internal @@ -0,0 +1,95 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This Dockerfile is designed for HuggingFace internal CI environments +# that require GPU access. It starts from an NVIDIA CUDA base image. + +# docker build -f docker/Dockerfile.internal -t lerobot-internal . + +# Configure the base image for CI with GPU access +# TODO(Steven): Bump these versions +ARG CUDA_VERSION=12.4.1 +ARG OS_VERSION=22.04 +FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${OS_VERSION} + +# Define Python version argument +ARG PYTHON_VERSION=3.12 + +# Configure environment variables +ENV DEBIAN_FRONTEND=noninteractive \ + MUJOCO_GL=egl \ + PATH=/lerobot/.venv/bin:$PATH \ + CUDA_VISIBLE_DEVICES=0 \ + TEST_TYPE=single_gpu \ + DEVICE=cuda + +# Install Python, system dependencies, and uv (as root) +RUN apt-get update && apt-get install -y --no-install-recommends \ + software-properties-common build-essential git curl \ + libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \ + libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev \ + cmake pkg-config ninja-build \ + && add-apt-repository -y ppa:deadsnakes/ppa \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + python${PYTHON_VERSION} \ + python${PYTHON_VERSION}-venv \ + python${PYTHON_VERSION}-dev \ + && curl -LsSf https://astral.sh/uv/install.sh | sh \ + && mv /root/.local/bin/uv /usr/local/bin/uv \ + && useradd --create-home --shell /bin/bash user_lerobot \ + && usermod -aG sudo user_lerobot \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +# Create application directory and set permissions +WORKDIR /lerobot +RUN chown -R user_lerobot:user_lerobot /lerobot + +# Switch to the non-root user +USER user_lerobot + +# Environment variables for the testing +ENV HOME=/home/user_lerobot \ + HF_HOME=/home/user_lerobot/.cache/huggingface \ + HF_LEROBOT_HOME=/home/user_lerobot/.cache/huggingface/lerobot \ + TORCH_HOME=/home/user_lerobot/.cache/torch \ + TRITON_CACHE_DIR=/home/user_lerobot/.cache/triton + +# Create the virtual environment +# We use a virtual environment inside the container—even though the container itself \ +# provides isolation—to ensure compatibility with the cluster and to prevent \ +# issues with MuJoCo and OpenGL drivers. +RUN uv venv --python python${PYTHON_VERSION} + +# Install Python dependencies for caching +COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml README.md MANIFEST.in ./ +COPY --chown=user_lerobot:user_lerobot src/ src/ + +ARG UNBOUND_DEPS=false + +RUN if [ "$UNBOUND_DEPS" = "true" ]; then \ + sed -i 's/,[[:space:]]*<[0-9\.]*//g' pyproject.toml; \ + echo "Dependencies unbound:" && cat pyproject.toml; \ + fi + +RUN uv pip install --no-cache ".[all]" + +RUN chmod +x /lerobot/.venv/lib/python${PYTHON_VERSION}/site-packages/triton/backends/nvidia/bin/ptxas + +# Copy the rest of the application source code +# Make sure to have the git-LFS files for testing +COPY --chown=user_lerobot:user_lerobot . . + +# Set the default command +CMD ["/bin/bash"] diff --git a/lerobot/docker/Dockerfile.user b/lerobot/docker/Dockerfile.user new file mode 100644 index 0000000000000000000000000000000000000000..f267be7f210a73f6e04365085702ba1f5e54f8a9 --- /dev/null +++ b/lerobot/docker/Dockerfile.user @@ -0,0 +1,81 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This Dockerfile is designed for a lerobot user who wants to +# experiment with the project. It starts from an Python Slim base image. + +# docker build -f docker/Dockerfile.user -t lerobot-user . +# docker run -it --rm lerobot-user + +# With USB physical access : docker run -it --device=/dev/ -v /dev/:/dev/ --rm lerobot-user + +# Configure the base image +ARG PYTHON_VERSION=3.12 +FROM python:${PYTHON_VERSION}-slim + +# Configure environment variables +ENV DEBIAN_FRONTEND=noninteractive \ + MUJOCO_GL=egl \ + PATH=/lerobot/.venv/bin:$PATH + +# Install system dependencies and uv (as root) +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential git curl libglib2.0-0 libegl1-mesa-dev ffmpeg \ + libusb-1.0-0-dev speech-dispatcher libgeos-dev portaudio19-dev \ + cmake pkg-config ninja-build \ + && curl -LsSf https://astral.sh/uv/install.sh | sh \ + && mv /root/.local/bin/uv /usr/local/bin/uv \ + && useradd --create-home --shell /bin/bash user_lerobot \ + && usermod -aG sudo user_lerobot \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +# Create application directory and set permissions +WORKDIR /lerobot +RUN chown -R user_lerobot:user_lerobot /lerobot + +# Switch to the non-root user +USER user_lerobot + +# Environment variables for the testing +ENV HOME=/home/user_lerobot \ + HF_HOME=/home/user_lerobot/.cache/huggingface \ + HF_LEROBOT_HOME=/home/user_lerobot/.cache/huggingface/lerobot \ + TORCH_HOME=/home/user_lerobot/.cache/torch \ + TRITON_CACHE_DIR=/home/user_lerobot/.cache/triton + +# Create the virtual environment +# We use a virtual environment inside the container—even though the container itself \ +# provides isolation—to closely resemble local development and allow users to \ +# run other Python projects in the same container without dependency conflicts. +RUN uv venv + +# Install Python dependencies for caching +COPY --chown=user_lerobot:user_lerobot setup.py pyproject.toml README.md MANIFEST.in ./ +COPY --chown=user_lerobot:user_lerobot src/ src/ + +ARG UNBOUND_DEPS=false + +RUN if [ "$UNBOUND_DEPS" = "true" ]; then \ + sed -i 's/,[[:space:]]*<[0-9\.]*//g' pyproject.toml; \ + echo "Dependencies unbound:" && cat pyproject.toml; \ + fi + +RUN uv pip install --no-cache ".[all]" + +# Copy the rest of the application code +# Make sure to have the git-LFS files for testing +COPY --chown=user_lerobot:user_lerobot . . + +# Set the default command +CMD ["/bin/bash"] diff --git a/lerobot/docs-requirements.txt b/lerobot/docs-requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..e286ad2bb46d14ba2b77d5a3e364669406089965 --- /dev/null +++ b/lerobot/docs-requirements.txt @@ -0,0 +1,3 @@ +# docs-requirements.txt +hf-doc-builder @ git+https://github.com/huggingface/doc-builder.git@main +watchdog>=6.0.0 diff --git a/lerobot/docs/README.md b/lerobot/docs/README.md new file mode 100644 index 0000000000000000000000000000000000000000..476eb8dce871b56577ff92a86fb8c699699f51e1 --- /dev/null +++ b/lerobot/docs/README.md @@ -0,0 +1,139 @@ + + +# Generating the documentation + +To generate the documentation, you first have to build it. Several packages are necessary to build the doc, +you can install them with the following command, at the root of the code repository: + +```bash +pip install -e . -r docs-requirements.txt +``` + +You will also need `nodejs`. Please refer to their [installation page](https://nodejs.org/en/download) + +--- + +**NOTE** + +You only need to generate the documentation to inspect it locally (if you're planning changes and want to +check how they look before committing for instance). You don't have to `git commit` the built documentation. + +--- + +## Building the documentation + +Once you have setup the `doc-builder` and additional packages, you can generate the documentation by +typing the following command: + +```bash +doc-builder build lerobot docs/source/ --build_dir ~/tmp/test-build +``` + +You can adapt the `--build_dir` to set any temporary folder that you prefer. This command will create it and generate +the MDX files that will be rendered as the documentation on the main website. You can inspect them in your favorite +Markdown editor. + +## Previewing the documentation + +To preview the docs, first install the `watchdog` module with: + +```bash +pip install watchdog +``` + +Then run the following command: + +```bash +doc-builder preview lerobot docs/source/ +``` + +The docs will be viewable at [http://localhost:3000](http://localhost:3000). You can also preview the docs once you have opened a PR. You will see a bot add a comment to a link where the documentation with your changes lives. + +--- + +**NOTE** + +The `preview` command only works with existing doc files. When you add a completely new file, you need to update `_toctree.yml` & restart `preview` command (`ctrl-c` to stop it & call `doc-builder preview ...` again). + +--- + +## Adding a new element to the navigation bar + +Accepted files are Markdown (.md). + +Create a file with its extension and put it in the source directory. You can then link it to the toc-tree by putting +the filename without the extension in the [`_toctree.yml`](https://github.com/huggingface/lerobot/blob/main/docs/source/_toctree.yml) file. + +## Renaming section headers and moving sections + +It helps to keep the old links working when renaming the section header and/or moving sections from one document to another. This is because the old links are likely to be used in Issues, Forums, and Social media and it'd make for a much more superior user experience if users reading those months later could still easily navigate to the originally intended information. + +Therefore, we simply keep a little map of moved sections at the end of the document where the original section was. The key is to preserve the original anchor. + +So if you renamed a section from: "Section A" to "Section B", then you can add at the end of the file: + +``` +Sections that were moved: + +[ Section A ] +``` + +and of course, if you moved it to another file, then: + +``` +Sections that were moved: + +[ Section A ] +``` + +Use the relative style to link to the new file so that the versioned docs continue to work. + +For an example of a rich moved sections set please see the very end of [the transformers Trainer doc](https://github.com/huggingface/transformers/blob/main/docs/source/en/main_classes/trainer.md). + +### Adding a new tutorial + +Adding a new tutorial or section is done in two steps: + +- Add a new file under `./source`. This file can either be ReStructuredText (.rst) or Markdown (.md). +- Link that file in `./source/_toctree.yml` on the correct toc-tree. + +Make sure to put your new file under the proper section. If you have a doubt, feel free to ask in a Github Issue or PR. + +### Writing source documentation + +Values that should be put in `code` should either be surrounded by backticks: \`like so\`. Note that argument names +and objects like True, None or any strings should usually be put in `code`. + +#### Writing a multi-line code block + +Multi-line code blocks can be useful for displaying examples. They are done between two lines of three backticks as usual in Markdown: + +```` +``` +# first line of code +# second line +# etc +``` +```` + +#### Adding an image + +Due to the rapidly growing repository, it is important to make sure that no files that would significantly weigh down the repository are added. This includes images, videos, and other non-text files. We prefer to leverage a hf.co hosted `dataset` like +the ones hosted on [`hf-internal-testing`](https://huggingface.co/hf-internal-testing) in which to place these files and reference +them by URL. We recommend putting them in the following dataset: [huggingface/documentation-images](https://huggingface.co/datasets/huggingface/documentation-images). +If an external contribution, feel free to add the images to your PR and ask a Hugging Face member to migrate your images +to this dataset. diff --git a/lerobot/docs/source/_toctree.yml b/lerobot/docs/source/_toctree.yml new file mode 100644 index 0000000000000000000000000000000000000000..09d94d28cca95dc4007e219bca77e7f7d1acfa60 --- /dev/null +++ b/lerobot/docs/source/_toctree.yml @@ -0,0 +1,136 @@ +- sections: + - local: index + title: LeRobot + - local: installation + title: Installation + title: Get started +- sections: + - local: il_robots + title: Imitation Learning for Robots + - local: bring_your_own_policies + title: Bring Your Own Policies + - local: integrate_hardware + title: Bring Your Own Hardware + - local: hilserl + title: Train a Robot with RL + - local: hilserl_sim + title: Train RL in Simulation + - local: multi_gpu_training + title: Multi GPU training + - local: peft_training + title: Training with PEFT (e.g., LoRA) + - local: rename_map + title: Using Rename Map and Empty Cameras + title: "Tutorials" +- sections: + - local: lerobot-dataset-v3 + title: Using LeRobotDataset + - local: porting_datasets_v3 + title: Porting Large Datasets + - local: using_dataset_tools + title: Using the Dataset Tools + - local: dataset_subtask + title: Using Subtasks in the Dataset + - local: streaming_video_encoding + title: Streaming Video Encoding + title: "Datasets" +- sections: + - local: act + title: ACT + - local: smolvla + title: SmolVLA + - local: pi0 + title: π₀ (Pi0) + - local: pi0fast + title: π₀-FAST (Pi0Fast) + - local: pi05 + title: π₀.₅ (Pi05) + - local: groot + title: NVIDIA GR00T N1.5 + - local: xvla + title: X-VLA + - local: walloss + title: WALL-OSS + title: "Policies" +- sections: + - local: sarm + title: SARM + title: "Reward Models" +- sections: + - local: async + title: Use Async Inference + - local: rtc + title: Real-Time Chunking (RTC) + title: "Inference" +- sections: + - local: envhub + title: Environments from the Hub + - local: envhub_leisaac + title: Control & Train Robots in Sim (LeIsaac) + - local: envhub_isaaclab_arena + title: NVIDIA IsaacLab Arena Environments + - local: libero + title: Using Libero + - local: metaworld + title: Using MetaWorld + title: "Simulation" +- sections: + - local: introduction_processors + title: Introduction to Robot Processors + - local: debug_processor_pipeline + title: Debug your processor pipeline + - local: implement_your_own_processor + title: Implement your own processor + - local: processors_robots_teleop + title: Processors for Robots and Teleoperators + - local: env_processor + title: Environment Processors + title: "Robot Processors" +- sections: + - local: so101 + title: SO-101 + - local: so100 + title: SO-100 + - local: koch + title: Koch v1.1 + - local: lekiwi + title: LeKiwi + - local: hope_jr + title: Hope Jr + - local: reachy2 + title: Reachy 2 + - local: unitree_g1 + title: Unitree G1 + - local: earthrover_mini_plus + title: Earth Rover Mini + - local: omx + title: OMX + - local: openarm + title: OpenArm + title: "Robots" +- sections: + - local: phone_teleop + title: Phone + title: "Teleoperators" +- sections: + - local: cameras + title: Cameras + title: "Sensors" +- sections: + - local: torch_accelerators + title: PyTorch accelerators + title: "Supported Hardware" +- sections: + - local: notebooks + title: Notebooks + - local: feetech + title: Updating Feetech Firmware + - local: damiao + title: Damiao Motors and CAN Bus + title: "Resources" +- sections: + - local: contributing + title: Contribute to LeRobot + - local: backwardcomp + title: Backward compatibility + title: "About" diff --git a/lerobot/docs/source/act.mdx b/lerobot/docs/source/act.mdx new file mode 100644 index 0000000000000000000000000000000000000000..453bcbba89aabf6c6c0e195f3f2599cc56e27bb7 --- /dev/null +++ b/lerobot/docs/source/act.mdx @@ -0,0 +1,95 @@ +# ACT (Action Chunking with Transformers) + +ACT is a **lightweight and efficient policy for imitation learning**, especially well-suited for fine-grained manipulation tasks. It's the **first model we recommend when you're starting out** with LeRobot due to its fast training time, low computational requirements, and strong performance. + +
+ +
+ +_Watch this tutorial from the LeRobot team to learn how ACT works: [LeRobot ACT Tutorial](https://www.youtube.com/watch?v=ft73x0LfGpM)_ + +## Model Overview + +Action Chunking with Transformers (ACT) was introduced in the paper [Learning Fine-Grained Bimanual Manipulation with Low-Cost Hardware](https://arxiv.org/abs/2304.13705) by Zhao et al. The policy was designed to enable precise, contact-rich manipulation tasks using affordable hardware and minimal demonstration data. + +### Why ACT is Great for Beginners + +ACT stands out as an excellent starting point for several reasons: + +- **Fast Training**: Trains in a few hours on a single GPU +- **Lightweight**: Only ~80M parameters, making it efficient and easy to work with +- **Data Efficient**: Often achieves high success rates with just 50 demonstrations + +### Architecture + +ACT uses a transformer-based architecture with three main components: + +1. **Vision Backbone**: ResNet-18 processes images from multiple camera viewpoints +2. **Transformer Encoder**: Synthesizes information from camera features, joint positions, and a learned latent variable +3. **Transformer Decoder**: Generates coherent action sequences using cross-attention + +The policy takes as input: + +- Multiple RGB images (e.g., from wrist cameras, front/top cameras) +- Current robot joint positions +- A latent style variable `z` (learned during training, set to zero during inference) + +And outputs a chunk of `k` future action sequences. + +## Installation Requirements + +1. Install LeRobot by following our [Installation Guide](./installation). +2. ACT is included in the base LeRobot installation, so no additional dependencies are needed! + +## Training ACT + +ACT works seamlessly with the standard LeRobot training pipeline. Here's a complete example for training ACT on your dataset: + +```bash +lerobot-train \ + --dataset.repo_id=${HF_USER}/your_dataset \ + --policy.type=act \ + --output_dir=outputs/train/act_your_dataset \ + --job_name=act_your_dataset \ + --policy.device=cuda \ + --wandb.enable=true \ + --policy.repo_id=${HF_USER}/act_policy +``` + +### Training Tips + +1. **Start with defaults**: ACT's default hyperparameters work well for most tasks +2. **Training duration**: Expect a few hours for 100k training steps on a single GPU +3. **Batch size**: Start with batch size 8 and adjust based on your GPU memory + +### Train using Google Colab + +If your local computer doesn't have a powerful GPU, you can utilize Google Colab to train your model by following the [ACT training notebook](./notebooks#training-act). + +## Evaluating ACT + +Once training is complete, you can evaluate your ACT policy using the `lerobot-record` command with your trained policy. This will run inference and record evaluation episodes: + +```bash +lerobot-record \ + --robot.type=so100_follower \ + --robot.port=/dev/ttyACM0 \ + --robot.id=my_robot \ + --robot.cameras="{ front: {type: opencv, index_or_path: 0, width: 640, height: 480, fps: 30}}" \ + --display_data=true \ + --dataset.repo_id=${HF_USER}/eval_act_your_dataset \ + --dataset.num_episodes=10 \ + --dataset.single_task="Your task description" \ + --dataset.streaming_encoding=true \ + --dataset.encoder_threads=2 \ + # --dataset.vcodec=auto \ + --policy.path=${HF_USER}/act_policy +``` diff --git a/lerobot/docs/source/async.mdx b/lerobot/docs/source/async.mdx new file mode 100644 index 0000000000000000000000000000000000000000..a46408a0d05ee817be678032d2f893b2fff61400 --- /dev/null +++ b/lerobot/docs/source/async.mdx @@ -0,0 +1,313 @@ +# Asynchronous Inference + +With our [SmolVLA](https://huggingface.co/papers/2506.01844) we introduced a new way to run inference on real-world robots, **decoupling action prediction from action execution**. +In this tutorial, we'll show how to use asynchronous inference (_async inference_) using a finetuned version of SmolVLA, and all the policies supported by LeRobot. +**Try async inference with all the policies** supported by LeRobot! + +**What you'll learn:** + +1. Why asynchronous inference matters and how it compares to, more traditional, sequential inference. +2. How to spin-up a `PolicyServer` and connect a `RobotClient` from the same machine, and even over the network. +3. How to tune key parameters (`actions_per_chunk`, `chunk_size_threshold`) for your robot and policy. + +If you get stuck, hop into our [Discord community](https://discord.gg/s3KuuzsPFb)! + +In a nutshell: with _async inference_, your robot keeps acting while the policy server is already busy computing the next chunk of actions---eliminating "wait-for-inference" lags and unlocking smoother, more reactive behaviours. +This is fundamentally different from synchronous inference (sync), where the robot stays idle while the policy computes the next chunk of actions. + +--- + +## Getting started with async inference + +You can read more information on asynchronous inference in our [blogpost](https://huggingface.co/blog/async-robot-inference). This guide is designed to help you quickly set up and run asynchronous inference in your environment. + +First, install `lerobot` with the `async` tag, to install the extra dependencies required to run async inference. + +```shell +pip install -e ".[async]" +``` + +Then, spin up a policy server (in one terminal, or in a separate machine) specifying the host address and port for the client to connect to. +You can spin up a policy server running: + +```shell +python -m lerobot.async_inference.policy_server \ + --host=127.0.0.1 \ + --port=8080 +``` + +This will start a policy server listening on `127.0.0.1:8080` (`localhost`, port 8080). At this stage, the policy server is empty, as all information related to which policy to run and with which parameters are specified during the first handshake with the client. Spin up a client with: + +```shell +python -m lerobot.async_inference.robot_client \ + --server_address=127.0.0.1:8080 \ # SERVER: the host address and port of the policy server + --robot.type=so100_follower \ # ROBOT: your robot type + --robot.port=/dev/tty.usbmodem585A0076841 \ # ROBOT: your robot port + --robot.id=follower_so100 \ # ROBOT: your robot id, to load calibration file + --robot.cameras="{ laptop: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}, phone: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}}" \ # POLICY: the cameras used to acquire frames, with keys matching the keys expected by the policy + --task="dummy" \ # POLICY: The task to run the policy on (`Fold my t-shirt`). Not necessarily defined for all policies, such as `act` + --policy_type=your_policy_type \ # POLICY: the type of policy to run (smolvla, act, etc) + --pretrained_name_or_path=user/model \ # POLICY: the model name/path on server to the checkpoint to run (e.g., lerobot/smolvla_base) + --policy_device=mps \ # POLICY: the device to run the policy on, on the server (cuda, mps, xpu, cpu) + --actions_per_chunk=50 \ # POLICY: the number of actions to output at once + --chunk_size_threshold=0.5 \ # CLIENT: the threshold for the chunk size before sending a new observation to the server + --aggregate_fn_name=weighted_average \ # CLIENT: the function to aggregate actions on overlapping portions + --debug_visualize_queue_size=True # CLIENT: whether to visualize the queue size at runtime +``` + +In summary, you need to specify instructions for: + +- `SERVER`: the address and port of the policy server +- `ROBOT`: the type of robot to connect to, the port to connect to, and the local `id` of the robot +- `POLICY`: the type of policy to run, and the model name/path on server to the checkpoint to run. You also need to specify which device should the sever be using, and how many actions to output at once (capped at the policy max actions value). +- `CLIENT`: the threshold for the chunk size before sending a new observation to the server, and the function to aggregate actions on overlapping portions. Optionally, you can also visualize the queue size at runtime, to help you tune the `CLIENT` parameters. + +Importantly, + +- `actions_per_chunk` and `chunk_size_threshold` are key parameters to tune for your setup. +- `aggregate_fn_name` is the function to aggregate actions on overlapping portions. You can either add a new one to a registry of functions, or add your own in `robot_client.py` (see [here](NOTE:addlinktoLOC)) +- `debug_visualize_queue_size` is a useful tool to tune the `CLIENT` parameters. + +## Done! You should see your robot moving around by now 😉 + +## Async vs. synchronous inference + +Synchronous inference relies on interleaving action chunk prediction and action execution. This inherently results in _idle frames_, frames where the robot awaits idle the policy's output: a new action chunk. +In turn, inference is plagued by evident real-time lags, where the robot simply stops acting due to the lack of available actions. +With robotics models increasing in size, this problem risks becoming only more severe. + +

+ +

+

+ Synchronous inference makes the robot idle while the policy is + computing the next chunk of actions. +

+ +To overcome this, we design async inference, a paradigm where action planning and execution are decoupled, resulting in (1) higher adaptability and, most importantly, (2) no idle frames. +Crucially, with async inference, the next action chunk is computed _before_ the current one is exhausted, resulting in no idleness. +Higher adaptability is ensured by aggregating the different action chunks on overlapping portions, obtaining an up-to-date plan and a tighter control loop. + +

+ +

+

+ Asynchronous inference results in no idleness because the next chunk is + computed before the current chunk is exhausted. +

+ +--- + +## Start the Policy Server + +Policy servers are wrappers around a `PreTrainedPolicy` interfacing them with observations coming from a robot client. +Policy servers are initialized as empty containers which are populated with the requested policy specified in the initial handshake between the robot client and the policy server. +As such, spinning up a policy server is as easy as specifying the host address and port. If you're running the policy server on the same machine as the robot client, you can use `localhost` as the host address. + + + +```bash +python -m lerobot.async_inference.policy_server \ + --host=127.0.0.1 \ + --port=8080 +``` + + + + +```python +from lerobot.async_inference.configs import PolicyServerConfig +from lerobot.async_inference.policy_server import serve + +config = PolicyServerConfig( + host="localhost", + port=8080, +) +serve(config) +``` + + + + + +This listens on `localhost:8080` for an incoming connection from the associated`RobotClient`, which will communicate which policy to run during the first client-server handshake. + +--- + +## Launch the Robot Client + +`RobotClient` is a wrapper around a `Robot` instance, which `RobotClient` connects to the (possibly remote) `PolicyServer`. +The `RobotClient` streams observations to the `PolicyServer`, and receives action chunks obtained running inference on the server (which we assume to have better computational resources than the robot controller). + + + +```bash +python -m lerobot.async_inference.robot_client \ + --server_address=127.0.0.1:8080 \ # SERVER: the host address and port of the policy server + --robot.type=so100_follower \ # ROBOT: your robot type + --robot.port=/dev/tty.usbmodem585A0076841 \ # ROBOT: your robot port + --robot.id=follower_so100 \ # ROBOT: your robot id, to load calibration file + --robot.cameras="{ laptop: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}, phone: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}}" \ # POLICY: the cameras used to acquire frames, with keys matching the keys expected by the policy + --task="dummy" \ # POLICY: The task to run the policy on (`Fold my t-shirt`). Not necessarily defined for all policies, such as `act` + --policy_type=your_policy_type \ # POLICY: the type of policy to run (smolvla, act, etc) + --pretrained_name_or_path=user/model \ # POLICY: the model name/path on server to the checkpoint to run (e.g., lerobot/smolvla_base) + --policy_device=mps \ # POLICY: the device to run the policy on, on the server + --actions_per_chunk=50 \ # POLICY: the number of actions to output at once + --chunk_size_threshold=0.5 \ # CLIENT: the threshold for the chunk size before sending a new observation to the server + --aggregate_fn_name=weighted_average \ # CLIENT: the function to aggregate actions on overlapping portions + --debug_visualize_queue_size=True # CLIENT: whether to visualize the queue size at runtime +``` + + + + +```python +import threading +from lerobot.robots.so_follower import SO100FollowerConfig +from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig +from lerobot.async_inference.configs import RobotClientConfig +from lerobot.async_inference.robot_client import RobotClient +from lerobot.async_inference.helpers import visualize_action_queue_size + +# 1. Create the robot instance +"""Check out the cameras available in your setup by running `python lerobot/find_cameras.py`""" +# these cameras must match the ones expected by the policy +# check the config.json on the Hub for the policy you are using +camera_cfg = { + "top": OpenCVCameraConfig(index_or_path=0, width=640, height=480, fps=30), + "side": OpenCVCameraConfig(index_or_path=1, width=640, height=480, fps=30) +} + +robot_cfg = SO100FollowerConfig( + port="/dev/tty.usbmodem585A0076841", + id="follower_so100", + cameras=camera_cfg +) + +# 3. Create client configuration +client_cfg = RobotClientConfig( + robot=robot_cfg, + server_address="localhost:8080", + policy_device="mps", + client_device="cpu", + policy_type="smolvla", + pretrained_name_or_path="/smolvla_async", + chunk_size_threshold=0.5, + actions_per_chunk=50, # make sure this is less than the max actions of the policy +) + +# 4. Create and start client +client = RobotClient(client_cfg) + +# 5. Specify the task +task = "Don't do anything, stay still" + +if client.start(): + # Start action receiver thread + action_receiver_thread = threading.Thread(target=client.receive_actions, daemon=True) + action_receiver_thread.start() + + try: + # Run the control loop + client.control_loop(task) + except KeyboardInterrupt: + client.stop() + action_receiver_thread.join() + # (Optionally) plot the action queue size + visualize_action_queue_size(client.action_queue_size) +``` + + + + + +The following two parameters are key in every setup: + + + + + + + + + + + + + + + + + + + + + +
HyperparameterDefaultWhat it does
+ actions_per_chunk + 50 + How many actions the policy outputs at once. Typical values: 10-50. +
+ chunk_size_threshold + 0.7 + When the queue is ≤ 50% full, the client sends a fresh observation. + Value in [0, 1]. +
+ + + Different values of `actions_per_chunk` and `chunk_size_threshold` do result + in different behaviours. + + +On the one hand, increasing the value of `actions_per_chunk` will result in reducing the likelihood of ending up with no actions to execute, as more actions will be available when the new chunk is computed. +However, larger values of `actions_per_chunk` might also result in less precise actions, due to the compounding errors consequent to predicting actions over longer timespans. + +On the other hand, increasing the value of `chunk_size_threshold` will result in sending out to the `PolicyServer` observations for inference more often, resulting in a larger number of updates action chunks, overlapping on significant portions. This results in high adaptability, in the limit predicting one action chunk for each observation, which is in turn only marginally consumed while a new one is produced. +This option does also put more pressure on the inference pipeline, as a consequence of the many requests. Conversely, values of `chunk_size_threshold` close to 0.0 collapse to the synchronous edge case, whereby new observations are only sent out whenever the current chunk is exhausted. + +We found the default values of `actions_per_chunk` and `chunk_size_threshold` to work well in the experiments we developed for the [SmolVLA paper](https://huggingface.co/papers/2506.01844), but recommend experimenting with different values to find the best fit for your setup. + +### Tuning async inference for your setup + +1. **Choose your computational resources carefully.** [PI0](https://huggingface.co/lerobot/pi0) occupies 14GB of memory at inference time, while [SmolVLA](https://huggingface.co/lerobot/smolvla_base) requires only ~2GB. You should identify the best computational resource for your use case keeping in mind smaller policies require less computational resources. The combination of policy and device used (CPU-intensive, using MPS, or the number of CUDA cores on a given NVIDIA GPU) directly impacts the average inference latency you should expect. +2. **Adjust your `fps` based on inference latency.** While the server generates a new action chunk, the client is not idle and is stepping through its current action queue. If the two processes happen at fundamentally different speeds, the client might end up with an empty queue. As such, you should reduce your fps if you consistently run out of actions in queue. +3. **Adjust `chunk_size_threshold`**. + - Values closer to `0.0` result in almost sequential behavior. Values closer to `1.0` → send observation every step (more bandwidth, relies on good world-model). + - We found values around 0.5-0.6 to work well. If you want to tweak this, spin up a `RobotClient` setting the `--debug_visualize_queue_size` to `True`. This will plot the action queue size evolution at runtime, and you can use it to find the value of `chunk_size_threshold` that works best for your setup. + +

+ +

+

+ + The action queue size is plotted at runtime when the + `--debug_visualize_queue_size` flag is passed, for various levels of + `chunk_size_threshold` (`g` in the SmolVLA paper). + +

+ +--- + +## Conclusion + +Asynchronous inference represents a significant advancement in real-time robotics control, addressing the fundamental challenge of inference latency that has long plagued robotics applications. Through this tutorial, you've learned how to implement a complete async inference pipeline that eliminates idle frames and enables smoother, more reactive robot behaviors. + +**Key Takeaways:** + +- **Paradigm Shift**: Async inference decouples action prediction from execution, allowing robots to continue acting while new action chunks are computed in parallel +- **Performance Benefits**: Eliminates "wait-for-inference" lags that are inherent in synchronous approaches, becoming increasingly important as policy models grow larger +- **Flexible Architecture**: The server-client design enables distributed computing, where inference can run on powerful remote hardware while maintaining real-time robot control +- **Tunable Parameters**: Success depends on properly configuring `actions_per_chunk` and `chunk_size_threshold` for your specific hardware, policy, and task requirements +- **Universal Compatibility**: Works with all LeRobot-supported policies, from lightweight ACT models to vision-language models like SmolVLA + +Start experimenting with the default parameters, monitor your action queue sizes, and iteratively refine your setup to achieve optimal performance for your specific use case. +If you want to discuss this further, hop into our [Discord community](https://discord.gg/s3KuuzsPFb), or open an issue on our [GitHub repository](https://github.com/huggingface/lerobot/issues). diff --git a/lerobot/docs/source/backwardcomp.mdx b/lerobot/docs/source/backwardcomp.mdx new file mode 100644 index 0000000000000000000000000000000000000000..3366c8ab916cbb98063ea89c0824d5a15a60e294 --- /dev/null +++ b/lerobot/docs/source/backwardcomp.mdx @@ -0,0 +1,151 @@ +# Backward compatibility + +## Policy Normalization Migration (PR #1452) + +**Breaking Change**: LeRobot policies no longer have built-in normalization layers embedded in their weights. Normalization is now handled by external `PolicyProcessorPipeline` components. + +### What changed? + +| | Before PR #1452 | After PR #1452 | +| -------------------------- | ------------------------------------------------ | ------------------------------------------------------------ | +| **Normalization Location** | Embedded in model weights (`normalize_inputs.*`) | External `PolicyProcessorPipeline` components | +| **Model State Dict** | Contains normalization statistics | **Clean weights only** - no normalization parameters | +| **Usage** | `policy(batch)` handles everything | `preprocessor(batch)` → `policy(...)` → `postprocessor(...)` | + +### Impact on existing models + +- Models trained **before** PR #1452 have normalization embedded in their weights +- These models need migration to work with the new `PolicyProcessorPipeline` system +- The migration extracts normalization statistics and creates separate processor pipelines + +### Migrating old models + +Use the migration script to convert models with embedded normalization: + +```shell +python src/lerobot/processor/migrate_policy_normalization.py \ + --pretrained-path lerobot/act_aloha_sim_transfer_cube_human \ + --push-to-hub \ + --branch migrated +``` + +The script: + +1. **Extracts** normalization statistics from model weights +2. **Creates** external preprocessor and postprocessor pipelines +3. **Removes** normalization layers from model weights +4. **Saves** clean model + processor pipelines +5. **Pushes** to Hub with automatic PR creation + +### Using migrated models + +```python +# New usage pattern (after migration) +from lerobot.policies.factory import make_policy, make_pre_post_processors + +# Load model and processors separately +policy = make_policy(config, ds_meta=dataset.meta) +preprocessor, postprocessor = make_pre_post_processors( + policy_cfg=config, + dataset_stats=dataset.meta.stats +) + +# Process data through pipeline +processed_batch = preprocessor(raw_batch) +action = policy.select_action(processed_batch) +final_action = postprocessor(action) +``` + +## Hardware API redesign + +PR [#777](https://github.com/huggingface/lerobot/pull/777) improves the LeRobot calibration but is **not backward-compatible**. Below is a overview of what changed and how you can continue to work with datasets created before this pull request. + +### What changed? + +| | Before PR #777 | After PR #777 | +| --------------------------------- | ------------------------------------------------- | ------------------------------------------------------------ | +| **Joint range** | Degrees `-180...180°` | **Normalised range** Joints: `–100...100` Gripper: `0...100` | +| **Zero position (SO100 / SO101)** | Arm fully extended horizontally | **In middle of the range for each joint** | +| **Boundary handling** | Software safeguards to detect ±180 ° wrap-arounds | No wrap-around logic needed due to mid-range zero | + +--- + +### Impact on existing datasets + +- Recorded trajectories created **before** PR #777 will replay incorrectly if loaded directly: + - Joint angles are offset and incorrectly normalized. +- Any models directly finetuned or trained on the old data will need their inputs and outputs converted. + +### Using datasets made with the previous calibration system + +We provide a migration example script for replaying an episode recorded with the previous calibration here: `examples/backward_compatibility/replay.py`. +Below we take you through the modifications that are done in the example script to make the previous calibration datasets work. + +```diff ++ key = f"{name.removeprefix('main_')}.pos" + action[key] = action_array[i].item() ++ action["shoulder_lift.pos"] = -(action["shoulder_lift.pos"] - 90) ++ action["elbow_flex.pos"] -= 90 +``` + +Let's break this down. +New codebase uses `.pos` suffix for the position observations and we have removed `main_` prefix: + + +```python +key = f"{name.removeprefix('main_')}.pos" +``` + + +For `"shoulder_lift"` (id = 2), the 0 position is changed by -90 degrees and the direction is reversed compared to old calibration/code. + + +```python +action["shoulder_lift.pos"] = -(action["shoulder_lift.pos"] - 90) +``` + + +For `"elbow_flex"` (id = 3), the 0 position is changed by -90 degrees compared to old calibration/code. + + +```python +action["elbow_flex.pos"] -= 90 +``` + + +To use degrees normalization we then set the `--robot.use_degrees` option to `true`. + +```diff +python examples/backward_compatibility/replay.py \ + --robot.type=so101_follower \ + --robot.port=/dev/tty.usbmodem5A460814411 \ + --robot.id=blue \ ++ --robot.use_degrees=true \ + --dataset.repo_id=my_dataset_id \ + --dataset.episode=0 +``` + +### Using policies trained with the previous calibration system + +Policies output actions in the same format as the datasets (`torch.Tensors`). Therefore, the same transformations should be applied. + +To find these transformations, we recommend to first try and and replay an episode of the dataset your policy was trained on using the section above. +Then, add these same transformations on your inference script (shown here in the `record.py` script): + +```diff +action_values = predict_action( + observation_frame, + policy, + get_safe_torch_device(policy.config.device), + policy.config.use_amp, + task=single_task, + robot_type=robot.robot_type, + ) + action = {key: action_values[i].item() for i, key in enumerate(robot.action_features)} + ++ action["shoulder_lift.pos"] = -(action["shoulder_lift.pos"] - 90) ++ action["elbow_flex.pos"] -= 90 + robot.send_action(action) +``` + +If you have questions or run into migration issues, feel free to ask them on [Discord](https://discord.gg/s3KuuzsPFb) diff --git a/lerobot/docs/source/bring_your_own_policies.mdx b/lerobot/docs/source/bring_your_own_policies.mdx new file mode 100644 index 0000000000000000000000000000000000000000..9266c9e5bb667cfa2b822904122167db9c950286 --- /dev/null +++ b/lerobot/docs/source/bring_your_own_policies.mdx @@ -0,0 +1,175 @@ +# Bring Your Own Policies + +This tutorial explains how to integrate your own custom policy implementations into the LeRobot ecosystem, allowing you to leverage all LeRobot tools for training, evaluation, and deployment while using your own algorithms. + +## Step 1: Create a Policy Package + +Your custom policy should be organized as an installable Python package following LeRobot's plugin conventions. + +### Package Structure + +Create a package with the prefix `lerobot_policy_` (IMPORTANT!) followed by your policy name: + +```bash +lerobot_policy_my_custom_policy/ +├── pyproject.toml +└── src/ + └── lerobot_policy_my_custom_policy/ + ├── __init__.py + ├── configuration_my_custom_policy.py + ├── modeling_my_custom_policy.py + └── processor_my_custom_policy.py +``` + +### Package Configuration + +Set up your `pyproject.toml`: + +```toml +[project] +name = "lerobot_policy_my_custom_policy" +version = "0.1.0" +dependencies = [ + # your policy-specific dependencies +] +requires-python = ">= 3.12" + +[build-system] +build-backend = # your-build-backend +requires = # your-build-system +``` + +## Step 2: Define the Policy Configuration + +Create a configuration class that inherits from `PreTrainedConfig` and registers your policy type: + +```python +# configuration_my_custom_policy.py +from dataclasses import dataclass, field +from lerobot.configs.policies import PreTrainedConfig +from lerobot.configs.types import NormalizationMode + +@PreTrainedConfig.register_subclass("my_custom_policy") +@dataclass +class MyCustomPolicyConfig(PreTrainedConfig): + """Configuration class for MyCustomPolicy. + + Args: + n_obs_steps: Number of observation steps to use as input + horizon: Action prediction horizon + n_action_steps: Number of action steps to execute + hidden_dim: Hidden dimension for the policy network + # Add your policy-specific parameters here + """ + # ...PreTrainedConfig fields... + pass + + def __post_init__(self): + super().__post_init__() + # Add any validation logic here + + def validate_features(self) -> None: + """Validate input/output feature compatibility.""" + # Implement validation logic for your policy's requirements + pass +``` + +## Step 3: Implement the Policy Class + +Create your policy implementation by inheriting from LeRobot's base `PreTrainedPolicy` class: + +```python +# modeling_my_custom_policy.py +import torch +import torch.nn as nn +from typing import Any + +from lerobot.policies.pretrained import PreTrainedPolicy +from .configuration_my_custom_policy import MyCustomPolicyConfig + +class MyCustomPolicy(PreTrainedPolicy): + config_class = MyCustomPolicyConfig + name = "my_custom_policy" + + def __init__(self, config: MyCustomPolicyConfig, dataset_stats: dict[str, Any] = None): + super().__init__(config, dataset_stats) + ... +``` + +## Step 4: Add Data Processors + +Create processor functions: + +```python +# processor_my_custom_policy.py +from typing import Any +import torch + + +def make_my_custom_policy_pre_post_processors( + config, +) -> tuple[ + PolicyProcessorPipeline[dict[str, Any], dict[str, Any]], + PolicyProcessorPipeline[PolicyAction, PolicyAction], +]: + """Create preprocessing and postprocessing functions for your policy.""" + pass # Define your preprocessing and postprocessing logic here + +``` + +## Step 5: Package Initialization + +Expose your classes in the package's `__init__.py`: + +```python +# __init__.py +"""Custom policy package for LeRobot.""" + +try: + import lerobot # noqa: F401 +except ImportError: + raise ImportError( + "lerobot is not installed. Please install lerobot to use this policy package." + ) + +from .configuration_my_custom_policy import MyCustomPolicyConfig +from .modeling_my_custom_policy import MyCustomPolicy +from .processor_my_custom_policy import make_my_custom_policy_pre_post_processors + +__all__ = [ + "MyCustomPolicyConfig", + "MyCustomPolicy", + "make_my_custom_policy_pre_post_processors", +] +``` + +## Step 6: Installation and Usage + +### Install Your Policy Package + +```bash +cd lerobot_policy_my_custom_policy +pip install -e . + +# Or install from PyPI if published +pip install lerobot_policy_my_custom_policy +``` + +### Use Your Policy + +Once installed, your policy automatically integrates with LeRobot's training and evaluation tools: + +```bash +lerobot-train \ + --policy.type my_custom_policy \ + --env.type pusht \ + --steps 200000 +``` + +## Examples and Community Contributions + +Check out these example policy implementations: + +- [DiTFlow Policy](https://github.com/danielsanjosepro/lerobot_policy_ditflow) - Diffusion Transformer policy with flow-matching objective. Try it out in this example: [DiTFlow Example](https://github.com/danielsanjosepro/test_lerobot_policy_ditflow) + +Share your policy implementations with the community! 🤗 diff --git a/lerobot/docs/source/cameras.mdx b/lerobot/docs/source/cameras.mdx new file mode 100644 index 0000000000000000000000000000000000000000..8af0f5ae52c2b78fc853b3ec8c9d858766e0b046 --- /dev/null +++ b/lerobot/docs/source/cameras.mdx @@ -0,0 +1,220 @@ +# Cameras + +LeRobot offers multiple options for video capture: + +| Class | Supported Cameras | +| ----------------- | ----------------------------------- | +| `OpenCVCamera` | Phone, built-in laptop, USB webcams | +| `ZMQCamera` | Network-connected cameras | +| `RealSenseCamera` | Intel RealSense (with depth) | +| `Reachy2Camera` | Reachy 2 robot cameras | + +> [!TIP] +> For `OpenCVCamera` compatibility details, see the [Video I/O with OpenCV Overview](https://docs.opencv.org/4.x/d0/da7/videoio_overview.html). + +### Find your camera + +Every camera requires a unique identifier to be instantiated, allowing you to distinguish between multiple connected devices. + +`OpenCVCamera` and `RealSenseCamera` support auto-discovery. Run the command below to list available devices and their identifiers. Note that these identifiers may change after rebooting your computer or re-plugging the camera, depending on your operating system. + +```bash +lerobot-find-cameras opencv # or realsense for Intel Realsense cameras +``` + +The output will look something like this if you have two cameras connected: + +```bash +--- Detected Cameras --- +Camera #0: + Name: OpenCV Camera @ 0 + Type: OpenCV + Id: 0 + Backend api: AVFOUNDATION + Default stream profile: + Format: 16.0 + Width: 1920 + Height: 1080 + Fps: 15.0 +-------------------- +(more cameras ...) +``` + +> [!WARNING] +> When using Intel RealSense cameras in `macOS`, you could get this [error](https://github.com/IntelRealSense/librealsense/issues/12307): `Error finding RealSense cameras: failed to set power state`, this can be solved by running the same command with `sudo` permissions. Note that using RealSense cameras in `macOS` is unstable. + +`ZMQCamera` and `Reachy2Camera` do not support auto-discovery. They must be configured manually by providing their network address and port or robot SDK settings. + +## Use cameras + +### Frame access modes + +All camera classes implement three access modes for capturing frames: + +| Method | Behavior | Blocks? | Best For | +| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------- | ---------------------------------------- | +| `read()` | Waits for the camera hardware to return a frame. May block for a long time depending on the camera and SDK. | Yes | Simple scripts, sequential capture | +| `async_read(timeout_ms)` | Returns the latest unconsumed frame from background thread. Blocks only if buffer is empty, up to `timeout_ms`. Raises `TimeoutError` if no frame arrives. | With a timeout | Control loops synchronized to camera FPS | +| `read_latest(max_age_ms)` | Peeks at the most recent frame in buffer (may be stale). Raises `TimeoutError` if frame is older than `max_age_ms`. | No | UI visualization, logging, monitoring | + +### Usage examples + +The following examples show how to use the camera API to configure and capture frames from different camera types. + +- **Blocking and non-blocking frame capture** using an OpenCV-based camera +- **Color and depth capture** using an Intel RealSense camera + +> [!WARNING] +> Failing to cleanly disconnect cameras can cause resource leaks. Use the context manager protocol to ensure automatic cleanup: +> +> ```python +> with OpenCVCamera(config) as camera: +> ... +> ``` +> +> You can also call `connect()` and `disconnect()` manually, but always use a `finally` block for the latter. + + + + + +```python +from lerobot.cameras.opencv.configuration_opencv import OpenCVCameraConfig +from lerobot.cameras.opencv.camera_opencv import OpenCVCamera +from lerobot.cameras.configs import ColorMode, Cv2Rotation + +# Construct an `OpenCVCameraConfig` with your desired FPS, resolution, color mode, and rotation. +config = OpenCVCameraConfig( + index_or_path=0, + fps=15, + width=1920, + height=1080, + color_mode=ColorMode.RGB, + rotation=Cv2Rotation.NO_ROTATION +) + +# Instantiate and connect an `OpenCVCamera`, performing a warm-up read (default). +with OpenCVCamera(config) as camera: + + # Read a frame synchronously — blocks until hardware delivers a new frame + frame = camera.read() + print(f"read() call returned frame with shape:", frame.shape) + + # Read a frame asynchronously with a timeout — returns the latest unconsumed frame or waits up to timeout_ms for a new one + try: + for i in range(10): + frame = camera.async_read(timeout_ms=200) + print(f"async_read call returned frame {i} with shape:", frame.shape) + except TimeoutError as e: + print(f"No frame received within timeout: {e}") + + # Instantly return a frame - returns the most recent frame captured by the camera + try: + initial_frame = camera.read_latest(max_age_ms=1000) + for i in range(10): + frame = camera.read_latest(max_age_ms=1000) + print(f"read_latest call returned frame {i} with shape:", frame.shape) + print(f"Was a new frame received by the camera? {not (initial_frame == frame).any()}") + except TimeoutError as e: + print(f"Frame too old: {e}") + +``` + + + + + + +```python +from lerobot.cameras.realsense.configuration_realsense import RealSenseCameraConfig +from lerobot.cameras.realsense.camera_realsense import RealSenseCamera +from lerobot.cameras.configs import ColorMode, Cv2Rotation + +# Create a `RealSenseCameraConfig` specifying your camera’s serial number and enabling depth. +config = RealSenseCameraConfig( + serial_number_or_name="233522074606", + fps=15, + width=640, + height=480, + color_mode=ColorMode.RGB, + use_depth=True, + rotation=Cv2Rotation.NO_ROTATION +) + +# Instantiate and connect a `RealSenseCamera` with warm-up read (default). +camera = RealSenseCamera(config) +camera.connect() + +# Capture a color frame via `read()` and a depth map via `read_depth()`. +try: + color_frame = camera.read() + depth_map = camera.read_depth() + print("Color frame shape:", color_frame.shape) + print("Depth map shape:", depth_map.shape) +finally: + camera.disconnect() +``` + + + + + +## Use your phone's camera + + + + +To use your iPhone as a camera on macOS, enable the Continuity Camera feature: + +- Ensure your Mac is running macOS 13 or later, and your iPhone is on iOS 16 or later. +- Sign in both devices with the same Apple ID. +- Connect your devices with a USB cable or turn on Wi-Fi and Bluetooth for a wireless connection. + +For more details, visit [Apple support](https://support.apple.com/en-gb/guide/mac-help/mchl77879b8a/mac). + + + + +If you want to use your phone as a camera using OBS, follow these steps to set up a virtual camera. + +1. _(Linux only) Install `v4l2loopback-dkms` and `v4l-utils`_. These packages create virtual camera devices and verify their settings. Install with: + +```bash +sudo apt install v4l2loopback-dkms v4l-utils +``` + +2. _Install the [DroidCam app](https://droidcam.app) on your phone_. This app is available for both iOS and Android. +3. _Download and install [OBS Studio](https://obsproject.com)_. +4. _Download and install the [DroidCam OBS plugin](https://droidcam.app/obs)_. +5. _Start OBS Studio_. + +6. _Add your phone as a source_. Follow the instructions [here](https://droidcam.app/obs/usage). Be sure to set the resolution to `640x480` to avoid the watermarks. +7. _Adjust resolution settings_. In OBS Studio, go to `File > Settings > Video` or `OBS > Preferences... > Video`. Change the `Base(Canvas) Resolution` and the `Output(Scaled) Resolution` to `640x480` by manually typing it. +8. _Start virtual camera_. In OBS Studio, follow the instructions [here](https://obsproject.com/kb/virtual-camera-guide). +9. _Verify the virtual camera setup and resolution_. + - **Linux**: Use `v4l2-ctl` to list devices and check resolution: + ```bash + v4l2-ctl --list-devices # find VirtualCam and note its /dev/videoX path + v4l2-ctl -d /dev/videoX --get-fmt-video # replace with your VirtualCam path + ``` + You should see `VirtualCam` listed and resolution `640x480`. + - **macOS**: Open Photo Booth or FaceTime and select "OBS Virtual Camera" as the input. + - **Windows**: The native Camera app doesn't support virtual cameras. Use a video conferencing app (Zoom, Teams) or run `lerobot-find-cameras opencv` directly to verify. + +
+Troubleshooting + +> The virtual camera resolution is incorrect. + +Delete the virtual camera source and recreate it. The resolution cannot be changed after creation. + +> Error reading frame in background thread for OpenCVCamera(X): OpenCVCamera(X) frame width=640 or height=480 do not match configured width=1920 or height=1080. + +This error is caused by OBS Virtual Camera advertising a `1920x1080` resolution despite rescaling. The only fix for now is to comment out the width and height check in `_postprocess_image()`. + +
+ +
+
+ +If everything is set up correctly, your phone will appear as a standard OpenCV camera and can be used with `OpenCVCamera`. diff --git a/lerobot/docs/source/contributing.md b/lerobot/docs/source/contributing.md new file mode 120000 index 0000000000000000000000000000000000000000..f939e75f21a8badb5c40f527abd0e098fe9bc472 --- /dev/null +++ b/lerobot/docs/source/contributing.md @@ -0,0 +1 @@ +../../CONTRIBUTING.md \ No newline at end of file diff --git a/lerobot/docs/source/damiao.mdx b/lerobot/docs/source/damiao.mdx new file mode 100644 index 0000000000000000000000000000000000000000..45388ab9b815b1b0821b9e25b45b63d20091bb6d --- /dev/null +++ b/lerobot/docs/source/damiao.mdx @@ -0,0 +1,165 @@ +# Damiao Motors and CAN Bus + +This guide covers setup and usage of Damiao motors with LeRobot via CAN bus communication. + +Currently, only Linux is supported, as the OpenArms CAN adapter only has drivers for Linux. + +## Linux CAN Setup + +Before using Damiao motors, you need to set up the CAN interface on your Linux system. + +### Install CAN Utilities + +```bash +sudo apt-get install can-utils +``` + +### Configure CAN Interface (Manual) + +For standard CAN FD (recommended for OpenArms): + +```bash +sudo ip link set can0 down +sudo ip link set can0 type can bitrate 1000000 dbitrate 5000000 fd on +sudo ip link set can0 up +``` + +For standard CAN (without FD): + +```bash +sudo ip link set can0 down +sudo ip link set can0 type can bitrate 1000000 +sudo ip link set can0 up +``` + +### Configure CAN Interface (Using LeRobot) + +LeRobot provides a utility script to setup and test CAN interfaces: + +```bash +# Setup multiple interfaces (e.g., OpenArms Followers with 2 CAN buses) +lerobot-setup-can --mode=setup --interfaces=can0,can1 +``` + +## Debugging CAN Communication + +Use the built-in debug tools to test motor communication: + +```bash +# Test motors on all interfaces +lerobot-setup-can --mode=test --interfaces=can0,can1 + +# Run speed/latency test +lerobot-setup-can --mode=speed --interfaces=can0 +``` + +The test mode will scan for motors (IDs 0x01-0x08) and report which ones respond. Example output: + +``` +can0: UP (CAN FD) + Motor 0x01 (joint_1): ✓ FOUND + → Response 0x11 [FD]: 00112233... + Motor 0x02 (joint_2): ✓ FOUND + Motor 0x03 (joint_3): ✗ No response + ... + Summary: 2/8 motors found +``` + +## Usage + +### Basic Setup + +```python +from lerobot.motors import Motor +from lerobot.motors.damiao import DamiaoMotorsBus + +# Define your motors with send/receive CAN IDs +motors = { + "joint_1": Motor(id=0x01, motor_type_str="dm8009", recv_id=0x11), + "joint_2": Motor(id=0x02, motor_type_str="dm4340", recv_id=0x12), + "joint_3": Motor(id=0x03, motor_type_str="dm4310", recv_id=0x13), +} + +# Create the bus +bus = DamiaoMotorsBus( + port="can0", # Linux socketcan interface + motors=motors, +) + +# Connect +bus.connect() +``` + +### Reading Motor States + +```python +# Read single motor position (degrees) +position = bus.read("Present_Position", "joint_1") + +# Read from multiple motors +positions = bus.sync_read("Present_Position") # All motors +positions = bus.sync_read("Present_Position", ["joint_1", "joint_2"]) + +# Read all states at once (position, velocity, torque) +states = bus.sync_read_all_states() +# Returns: {'joint_1': {'position': 45.2, 'velocity': 1.3, 'torque': 0.5}, ...} +``` + +### Writing Motor Commands + +```python +# Enable torque +bus.enable_torque() + +# Set goal position (degrees) +bus.write("Goal_Position", "joint_1", 45.0) + +# Set positions for multiple motors +bus.sync_write("Goal_Position", { + "joint_1": 45.0, + "joint_2": -30.0, + "joint_3": 90.0, +}) + +# Disable torque +bus.disable_torque() +``` + +## Configuration Options + +| Parameter | Default | Description | +| -------------- | --------- | ----------------------------------------------------------- | +| `port` | - | CAN interface (`can0`) or serial port (`/dev/cu.usbmodem*`) | +| `use_can_fd` | `True` | Enable CAN FD for higher data rates | +| `bitrate` | `1000000` | Nominal bitrate (1 Mbps) | +| `data_bitrate` | `5000000` | CAN FD data bitrate (5 Mbps) | + +## Motor Configuration + +Each motor requires: + +- `id`: CAN ID for sending commands +- `motor_type`: One of the supported motor types (e.g., `"dm8009"`, `"dm4340"`) +- `recv_id`: CAN ID for receiving responses + +OpenArms default IDs follow the pattern: send ID `0x0N`, receive ID `0x1N` where N is the joint number. + +## Troubleshooting + +### No Response from Motors + +1. **Check power** +2. **Verify CAN wiring**: Check CAN-H, CAN-L, and GND connections +3. **Check motor IDs**: Use Damiao Debugging Tools to verify/configure IDs +4. **Test CAN interface**: Run `candump can0` to see if messages are being received +5. **Run diagnostics**: `lerobot-setup-can --mode=test --interfaces=can0` + +### Motor Timeout Parameter + +If motors were configured with timeout=0, they won't respond to commands. Use Damiao Debugging Tools to set a non-zero timeout value. + +### Verify CAN FD Status + +```bash +ip -d link show can0 | grep fd +``` diff --git a/lerobot/docs/source/dataset_subtask.mdx b/lerobot/docs/source/dataset_subtask.mdx new file mode 100644 index 0000000000000000000000000000000000000000..beb5d80bd81c67ecefe8b0e659b3687a2fc31957 --- /dev/null +++ b/lerobot/docs/source/dataset_subtask.mdx @@ -0,0 +1,278 @@ +# Using Subtasks in LeRobot Datasets + +Subtask support in robotics datasets has proven effective in improving robot reasoning and understanding. Subtasks are particularly useful for: + +- **Hierarchical policies**: Building policies that include subtask predictions to visualize robot reasoning in real time +- **Reward modeling**: Helping reward models understand task progression (e.g., SARM-style stage-aware reward models) +- **Task decomposition**: Breaking down complex manipulation tasks into atomic, interpretable steps + +LeRobotDataset now supports subtasks as part of its dataset structure, alongside tasks. + +## What are Subtasks? + +While a **task** describes the overall goal (e.g., "Pick up the apple and place it in the basket"), **subtasks** break down the execution into finer-grained steps: + +1. "Approach the apple" +2. "Grasp the apple" +3. "Lift the apple" +4. "Move to basket" +5. "Release the apple" + +Each frame in the dataset can be annotated with its corresponding subtask, enabling models to learn and predict these intermediate stages. + +An overview of subtask annotation showing how frames are labeled with intermediate subtask stages + +

+ Figure: Overview of subtask annotation. +

+ +**Reference:** _Subtask-learning based for robot self-assembly in flexible collaborative assembly in manufacturing_, Original Article, Published: 19 April 2022. + +## Dataset Structure + +Subtask information is stored in the dataset metadata: + +``` +my-dataset/ +├── data/ +│ └── ... +├── meta/ +│ ├── info.json +│ ├── stats.json +│ ├── tasks.parquet +│ ├── subtasks.parquet # Subtask index → subtask string mapping +│ └── episodes/ +│ └── ... +└── videos/ + └── ... +``` + +### Subtasks Parquet File + +The `meta/subtasks.parquet` file maps subtask indices to their natural language descriptions: + +| subtask_index | subtask (index column) | +| ------------- | ---------------------- | +| 0 | "Approach the apple" | +| 1 | "Grasp the apple" | +| 2 | "Lift the apple" | +| ... | ... | + +### Frame-Level Annotations + +Each frame in the dataset can include a `subtask_index` field that references the subtasks parquet file: + +```python +# Example frame data in the parquet file +{ + "index": 42, + "timestamp": 1.4, + "episode_index": 0, + "task_index": 0, + "subtask_index": 2, # References "Lift the apple" + "observation.state": [...], + "action": [...], +} +``` + +## Annotating Datasets with Subtasks + +We provide a HuggingFace Space for easily annotating any LeRobotDataset with subtasks: + +**[https://huggingface.co/spaces/lerobot/annotate](https://huggingface.co/spaces/lerobot/annotate)** + +After completing your annotation: + +1. Click "Push to Hub" to upload your annotated dataset +2. You can also run the annotation space locally by following the instructions at [github.com/huggingface/lerobot-annotate](https://github.com/huggingface/lerobot-annotate) + +## Loading Datasets with Subtasks + +When you load a dataset with subtask annotations, the subtask information is automatically available: + +```python +from lerobot.datasets.lerobot_dataset import LeRobotDataset + +# Load a dataset with subtask annotations +dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated") + +# Access a sample +sample = dataset[100] + +# The sample includes both task and subtask information +print(sample["task"]) # "Collect the fruit" +print(sample["subtask"]) # "Grasp the apple" +print(sample["task_index"]) # tensor(0) +print(sample["subtask_index"]) # tensor(2) +``` + +### Checking for Subtask Support + +You can check if a dataset has subtask annotations: + +```python +# Check if subtasks are available +has_subtasks = ( + "subtask_index" in dataset.features + and dataset.meta.subtasks is not None +) + +if has_subtasks: + print(f"Dataset has {len(dataset.meta.subtasks)} unique subtasks") + print("Subtasks:", list(dataset.meta.subtasks.index)) +``` + +## Using Subtasks for Training + +### With the Tokenizer Processor + +The `TokenizerProcessor` automatically handles subtask tokenization for Vision-Language Action (VLA) models: + +```python +from lerobot.processor.tokenizer_processor import TokenizerProcessor +from lerobot.processor.pipeline import ProcessorPipeline + +# Create a tokenizer processor +tokenizer_processor = TokenizerProcessor( + tokenizer_name_or_path="google/paligemma-3b-pt-224", + padding="max_length", + max_length=64, +) + +# The processor will automatically tokenize subtasks if present in the batch +# and add them to the observation under: +# - "observation.subtask.tokens" +# - "observation.subtask.attention_mask" +``` + +When subtasks are available in the batch, the tokenizer processor adds: + +- `observation.subtask.tokens`: Tokenized subtask text +- `observation.subtask.attention_mask`: Attention mask for the subtask tokens + +### DataLoader with Subtasks + +```python +import torch +from lerobot.datasets.lerobot_dataset import LeRobotDataset + +dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated") + +dataloader = torch.utils.data.DataLoader( + dataset, + batch_size=16, + shuffle=True, +) + +for batch in dataloader: + # Access subtask information in the batch + subtasks = batch["subtask"] # List of subtask strings + subtask_indices = batch["subtask_index"] # Tensor of subtask indices + + # Use for training hierarchical policies or reward models + print(f"Batch subtasks: {set(subtasks)}") +``` + +## Example Datasets with Subtask Annotations + +Try loading a dataset with subtask annotations: + +```python +from lerobot.datasets.lerobot_dataset import LeRobotDataset + +# Example dataset with subtask annotations +dataset = LeRobotDataset("jadechoghari/collect-fruit-annotated") + +# Explore the subtasks +print("Available subtasks:") +for subtask_name in dataset.meta.subtasks.index: + print(f" - {subtask_name}") + +# Get subtask distribution +subtask_counts = {} +for i in range(len(dataset)): + sample = dataset[i] + subtask = sample["subtask"] + subtask_counts[subtask] = subtask_counts.get(subtask, 0) + 1 + +print("\nSubtask distribution:") +for subtask, count in sorted(subtask_counts.items(), key=lambda x: -x[1]): + print(f" {subtask}: {count} frames") +``` + +## Use Cases + +### 1. Hierarchical Policy Training + +Train policies that predict both actions and current subtask: + +```python +class HierarchicalPolicy(nn.Module): + def __init__(self, num_subtasks): + super().__init__() + self.action_head = nn.Linear(hidden_dim, action_dim) + self.subtask_head = nn.Linear(hidden_dim, num_subtasks) + + def forward(self, observations): + features = self.encoder(observations) + actions = self.action_head(features) + subtask_logits = self.subtask_head(features) + return actions, subtask_logits +``` + +### 2. Stage-Aware Reward Modeling (SARM) + +Build reward models that understand task progression: + +```python +# SARM predicts: +# - Stage: Which subtask is being executed (discrete) +# - Progress: How far along the subtask (continuous 0-1) + +class SARMRewardModel(nn.Module): + def forward(self, observations): + features = self.encoder(observations) + stage_logits = self.stage_classifier(features) + progress = self.progress_regressor(features) + return stage_logits, progress +``` + +### 3. Progress Visualization + +Monitor robot execution by tracking subtask progression: + +```python +def visualize_execution(model, observations): + for t, obs in enumerate(observations): + action, subtask_logits = model(obs) + predicted_subtask = subtask_names[subtask_logits.argmax()] + print(f"t={t}: Executing '{predicted_subtask}'") +``` + +## API Reference + +### LeRobotDataset Properties + +| Property | Type | Description | +| --------------------------- | ---------------------- | ------------------------------------------ | +| `meta.subtasks` | `pd.DataFrame \| None` | DataFrame mapping subtask names to indices | +| `features["subtask_index"]` | `dict` | Feature spec for subtask_index if present | + +### Sample Keys + +When subtasks are available, each sample includes: + +| Key | Type | Description | +| --------------- | -------------- | ------------------------------------ | +| `subtask_index` | `torch.Tensor` | Integer index of the current subtask | +| `subtask` | `str` | Natural language subtask description | + +## Related Resources + +- [SARM Paper](https://arxiv.org/pdf/2509.25358) - Stage-Aware Reward Modeling for Long Horizon Robot Manipulation +- [LeRobot Annotate Space](https://huggingface.co/spaces/lerobot/annotate) - Interactive annotation tool +- [LeRobotDataset v3.0](./lerobot-dataset-v3) - Dataset format documentation diff --git a/lerobot/docs/source/debug_processor_pipeline.mdx b/lerobot/docs/source/debug_processor_pipeline.mdx new file mode 100644 index 0000000000000000000000000000000000000000..4826c947ecc2df9a7dfc5184d5847cf1029ec63c --- /dev/null +++ b/lerobot/docs/source/debug_processor_pipeline.mdx @@ -0,0 +1,299 @@ +# Debug Your Processor Pipeline + +Processor pipelines can be complex, especially when chaining multiple transformation steps. +Unlike simple function calls, pipelines lack natural observability, you can't easily see what happens +between each step or where things go wrong. +This guide provides debugging tools and techniques specifically designed to address these challenges +and help you understand data flow through your pipelines. + +We'll explore three complementary debugging approaches: **hooks** for runtime monitoring, **step-through debugging** for detailed inspection, and **feature validation** for catching structural mismatches. Each serves a different purpose and together they provide complete visibility into your pipeline's behavior. + +## Understanding Hooks + +Hooks are functions that get called at specific points during pipeline execution. +They provide a way to inspect, monitor, or modify data without changing your pipeline code. +Think of them as "event listeners" for your pipeline. + +### What is a Hook? + +A hook is a callback function that gets automatically invoked at specific moments during pipeline execution. +The concept comes from event-driven programming, imagine you could "hook into" the pipeline's execution flow to observe or react to what's happening. + +Think of hooks like inserting checkpoints into your pipeline. Every time the pipeline reaches one of these checkpoints, it pauses briefly to call your hook function, giving you a chance to inspect the current state, log information, and validate data. + +A hook is simply a function that accepts two parameters: + +- `step_idx: int` - The index of the current processing step (0, 1, 2, etc.) +- `transition: EnvTransition` - The data transition at that point in the pipeline + +The beauty of hooks is their non-invasive nature: you can add monitoring, validation, or debugging logic without changing a single line of your pipeline code. The pipeline remains clean and focused on its core logic, while hooks handle the cross-cutting concerns like logging, monitoring, and debugging. + +### Before vs After Hooks + +The pipeline supports two types of hooks: + +- **Before hooks** (`register_before_step_hook`) - Called before each step executes +- **After hooks** (`register_after_step_hook`) - Called after each step completes + +```python +def before_hook(step_idx: int, transition: EnvTransition): + """Called before step processes the transition.""" + print(f"About to execute step {step_idx}") + # Useful for: logging, validation, setup + +def after_hook(step_idx: int, transition: EnvTransition): + """Called after step has processed the transition.""" + print(f"Completed step {step_idx}") + # Useful for: monitoring results, cleanup, debugging + +processor.register_before_step_hook(before_hook) +processor.register_after_step_hook(after_hook) +``` + +### Implementing a NaN Detection Hook + +Here's a practical example of a hook that detects NaN values: + +```python +def check_nans(step_idx: int, transition: EnvTransition): + """Check for NaN values in observations.""" + obs = transition.get(TransitionKey.OBSERVATION) + if obs: + for key, value in obs.items(): + if isinstance(value, torch.Tensor) and torch.isnan(value).any(): + print(f"NaN detected in {key} at step {step_idx}") + +# Register the hook to run after each step +processor.register_after_step_hook(check_nans) + +# Process your data - the hook will be called automatically +output = processor(input_data) + +# Remove the hook when done debugging +processor.unregister_after_step_hook(check_nans) +``` + +### How Hooks Work Internally + +Understanding the internal mechanism helps you use hooks more effectively. The pipeline maintains two separate lists: one for before-step hooks and another for after-step hooks. When you register a hook, it's simply appended to the appropriate list. + +During execution, the pipeline follows a strict sequence: for each processing step, it first calls all before-hooks in registration order, then executes the actual step transformation, and finally calls all after-hooks in registration order. This creates a predictable, sandwich-like structure around each step. + +The key insight is that hooks don't change the core pipeline logic—they're purely additive. The pipeline's `_forward` method orchestrates this dance between hooks and processing steps, ensuring that your debugging or monitoring code runs at exactly the right moments without interfering with the main data flow. + +Here's a simplified view of how the pipeline executes hooks: + +```python +class DataProcessorPipeline: + def __init__(self): + self.steps = [...] + self.before_step_hooks = [] # List of before hooks + self.after_step_hooks = [] # List of after hooks + + def _forward(self, transition): + """Internal method that processes the transition through all steps.""" + for step_idx, processor_step in enumerate(self.steps): + # 1. Call all BEFORE hooks + for hook in self.before_step_hooks: + hook(step_idx, transition) + + # 2. Execute the actual processing step + transition = processor_step(transition) + + # 3. Call all AFTER hooks + for hook in self.after_step_hooks: + hook(step_idx, transition) + + return transition + + def register_before_step_hook(self, hook_fn): + self.before_step_hooks.append(hook_fn) + + def register_after_step_hook(self, hook_fn): + self.after_step_hooks.append(hook_fn) +``` + +### Execution Flow + +The execution flow looks like this: + +``` +Input → Before Hook → Step 0 → After Hook → Before Hook → Step 1 → After Hook → ... → Output +``` + +For example, with 3 steps and both hook types: + +```python +def timing_before(step_idx, transition): + print(f"⏱️ Starting step {step_idx}") + +def validation_after(step_idx, transition): + print(f"✅ Completed step {step_idx}") + +processor.register_before_step_hook(timing_before) +processor.register_after_step_hook(validation_after) + +# This will output: +# ⏱️ Starting step 0 +# ✅ Completed step 0 +# ⏱️ Starting step 1 +# ✅ Completed step 1 +# ⏱️ Starting step 2 +# ✅ Completed step 2 +``` + +### Multiple Hooks + +You can register multiple hooks of the same type - they execute in the order registered: + +```python +def log_shapes(step_idx: int, transition: EnvTransition): + obs = transition.get(TransitionKey.OBSERVATION) + if obs: + print(f"Step {step_idx} observation shapes:") + for key, value in obs.items(): + if isinstance(value, torch.Tensor): + print(f" {key}: {value.shape}") + +processor.register_after_step_hook(check_nans) # Executes first +processor.register_after_step_hook(log_shapes) # Executes second + +# Both hooks will be called after each step in registration order +output = processor(input_data) +``` + +While hooks are excellent for monitoring specific issues (like NaN detection) or gathering metrics during normal pipeline execution, sometimes you need to dive deeper. When you want to understand exactly what happens at each step or debug complex transformation logic, step-through debugging provides the detailed inspection you need. + +## Step-Through Debugging + +Step-through debugging is like having a slow-motion replay for your pipeline. Instead of watching your data get transformed in one quick blur from input to output, you can pause and examine what happens after each individual step. + +This approach is particularly valuable when you're trying to understand a complex pipeline, debug unexpected behavior, or verify that each transformation is working as expected. Unlike hooks, which are great for automated monitoring, step-through debugging gives you manual, interactive control over the inspection process. + +The `step_through()` method is a generator that yields the transition state after each processing step, allowing you to inspect intermediate results. Think of it as creating a series of snapshots of your data as it flows through the pipeline—each snapshot shows you exactly what your data looks like after one more transformation has been applied. + +### How Step-Through Works + +The `step_through()` method fundamentally changes how the pipeline executes. Instead of running all steps in sequence and only returning the final result, it transforms the pipeline into an iterator that yields intermediate results. + +Here's what happens internally: the method starts by converting your input data into the pipeline's internal transition format, then yields this initial state. Next, it applies the first processing step and yields the result. Then it applies the second step to that result and yields again, and so on. Each `yield` gives you a complete snapshot of the transition at that point. + +This generator pattern is powerful because it's lazy—the pipeline only computes the next step when you ask for it. This means you can stop at any point, inspect the current state thoroughly, and decide whether to continue. You're not forced to run the entire pipeline just to debug one problematic step. + +Instead of running the entire pipeline and only seeing the final result, `step_through()` pauses after each step and gives you the intermediate transition: + +```python +# This creates a generator that yields intermediate states +for i, intermediate_result in enumerate(processor.step_through(input_data)): + print(f"=== After step {i} ===") + + # Inspect the observation at this stage + obs = intermediate_result.get(TransitionKey.OBSERVATION) + if obs: + for key, value in obs.items(): + if isinstance(value, torch.Tensor): + print(f"{key}: shape={value.shape}, dtype={value.dtype}") +``` + +### Interactive Debugging with Breakpoints + +You can add breakpoints in the step-through loop to interactively debug: + +```python +# Step through the pipeline with debugging +for i, intermediate in enumerate(processor.step_through(data)): + print(f"Step {i}: {processor.steps[i].__class__.__name__}") + + # Set a breakpoint to inspect the current state + breakpoint() # Debugger will pause here + + # You can now inspect 'intermediate' in the debugger: + # - Check tensor shapes and values + # - Verify expected transformations + # - Look for unexpected changes +``` + +During the debugger session, you can: + +- Examine `intermediate[TransitionKey.OBSERVATION]` to see observation data +- Check `intermediate[TransitionKey.ACTION]` for action transformations +- Inspect any part of the transition to understand what each step does + +Step-through debugging is perfect for understanding the _data_ transformations, but what about the _structure_ of that data? While hooks and step-through help you debug runtime behavior, you also need to ensure your pipeline produces data in the format expected by downstream components. This is where feature contract validation comes in. + +## Validating Feature Contracts + +Feature contracts define what data structure your pipeline expects as input and produces as output. +Validating these contracts helps catch mismatches early. + +### Understanding Feature Contracts + +Each processor step has a `transform_features()` method that describes how it changes the data structure: + +```python +# Get the expected output features from your pipeline +initial_features = { + PipelineFeatureType.OBSERVATION: { + "observation.state": PolicyFeature(type=FeatureType.STATE, shape=(7,)), + "observation.image": PolicyFeature(type=FeatureType.IMAGE, shape=(3, 224, 224)) + }, + PipelineFeatureType.ACTION: { + "action": PolicyFeature(type=FeatureType.ACTION, shape=(4,)) + } +} + +# Check what your pipeline will output +output_features = processor.transform_features(initial_features) + +print("Input features:") +for feature_type, features in initial_features.items(): + print(f" {feature_type}:") + for key, feature in features.items(): + print(f" {key}: {feature.type.value}, shape={feature.shape}") + +print("\nOutput features:") +for feature_type, features in output_features.items(): + print(f" {feature_type}:") + for key, feature in features.items(): + print(f" {key}: {feature.type.value}, shape={feature.shape}") +``` + +### Verifying Expected Features + +Check that your pipeline produces the features you expect: + +```python +# Define what features you expect the pipeline to produce +expected_keys = ["observation.state", "observation.image", "action"] + +print("Validating feature contract...") +for expected_key in expected_keys: + found = False + for feature_type, features in output_features.items(): + if expected_key in features: + feature = features[expected_key] + print(f"✅ {expected_key}: {feature.type.value}, shape={feature.shape}") + found = True + break + + if not found: + print(f"❌ Missing expected feature: {expected_key}") +``` + +This validation helps ensure your pipeline will work correctly with downstream components that expect specific data structures. + +## Summary + +Now that you understand the three debugging approaches, you can tackle any pipeline issue systematically: + +1. **Hooks** - For runtime monitoring and validation without modifying pipeline code +2. **Step-through** - For inspecting intermediate states and understanding transformations +3. **Feature validation** - For ensuring data structure contracts are met + +**When to use each approach:** + +- Start with **step-through debugging** when you need to understand what your pipeline does or when something unexpected happens +- Add **hooks** for continuous monitoring during development and production to catch issues automatically +- Use **feature validation** before deployment to ensure your pipeline works with downstream components + +These three tools work together to give you the complete observability that complex pipelines naturally lack. With hooks watching for issues, step-through helping you understand behavior, and feature validation ensuring compatibility, you'll be able to debug any pipeline confidently and efficiently. diff --git a/lerobot/docs/source/earthrover_mini_plus.mdx b/lerobot/docs/source/earthrover_mini_plus.mdx new file mode 100644 index 0000000000000000000000000000000000000000..884e84d8c05e94cc5d5acc952114ee63285dd8f8 --- /dev/null +++ b/lerobot/docs/source/earthrover_mini_plus.mdx @@ -0,0 +1,238 @@ +# EarthRover Mini Plus + +EarthRover Mini Plus + +The EarthRover Mini Plus is a fully open source mobile robot that connects through the cloud using the Frodobots SDK. This lets you control the robot and record datasets for training AI models. + +## What You Need + +### Hardware + +- EarthRover Mini robot +- Computer with Python 3.12 or newer +- Internet connection + +### Setting Up the Frodobots SDK + +The robot needs the [Frodobots SDK](https://github.com/frodobots-org/earth-rovers-sdk) running on your computer. Here's how: + +1. Download and install the SDK: + +```bash +git clone https://github.com/frodobots-org/earth-rovers-sdk.git +cd earth-rovers-sdk +pip install -r requirements.txt +``` + +2. Save Credentials: + +Write your .env variables with the SDK API key and bot name provided by the Frodobots team. + +```bash +SDK_API_TOKEN=your_sdk_api_token_here +BOT_SLUG=your_bot_slug_here +CHROME_EXECUTABLE_PATH=/path/to/chrome_or_chromium +# Default value is MAP_ZOOM_LEVEL=18 https://wiki.openstreetmap.org/wiki/Zoom_levels +MAP_ZOOM_LEVEL=18 +MISSION_SLUG=your_mission_slug_here +# Image quality between 0.1 and 1.0 (default: 0.8) +# Recommended: 0.8 for better performance +IMAGE_QUALITY=0.8 +# Image format: jpeg, png or webp (default: png) +# Recommended: jpeg for better performance and lower bandwidth usage +IMAGE_FORMAT=jpeg +``` + +3. Start the SDK: + +```bash +hypercorn main:app --reload +``` + +4. Open your web browser and go to `http://localhost:8000`, then click "Join" + +The SDK gives you: + +- Live video from front and rear cameras + +> [!IMPORTANT] +> The SDK must be running before you can use the robot. + +## Install LeRobot + +Follow our [Installation Guide](./installation) to install LeRobot. + +In addition to the base installation, install the EarthRover Mini dependencies: + +```bash +pip install -e . +``` + +## How It Works + +The robot uses the internet to communicate: + +- **Movement commands**: Sent through the SDK +- **Camera video**: Received from the SDK +- **Robot info**: Battery, location, speed from the SDK + +You don't need to plug anything in - it all works through the SDK. + +## Calibration + +No calibration needed! The robot is ready to use as soon as the SDK is running. + +## Controlling the Robot + +You control the robot using your keyboard - just like playing a video game with WASD keys. + +### Keyboard Controls + +| Key | Action | +| --- | -------------------------------- | +| W | Move forward | +| S | Move backward | +| A | Turn left (with forward motion) | +| D | Turn right (with forward motion) | +| Q | Rotate left in place | +| E | Rotate right in place | +| X | Stop all movement | +| +/= | Increase speed | +| - | Decrease speed | +| ESC | Disconnect | + +### Speed Settings + +You can adjust how fast the robot moves: + +- **Forward/backward speed**: Default is full speed (1.0) +- **Turning speed**: Default is full speed (1.0) +- **Speed changes**: Use +/- keys to adjust by 0.1 each time + +### Try It Out + +Test driving the robot before recording data: + +```python +from lerobot.robots.earthrover_mini_plus import EarthRoverMiniPlus, EarthRoverMiniPlusConfig +from lerobot.teleoperators.keyboard import KeyboardRoverTeleop, KeyboardRoverTeleopConfig + +# Initialize robot +robot_config = EarthRoverMiniPlusConfig() +robot = EarthRoverMiniPlus(robot_config) + +# Initialize teleoperator +teleop_config = KeyboardRoverTeleopConfig( + linear_speed=1.0, + angular_speed=1.0, + speed_increment=0.1 +) +teleop = KeyboardRoverTeleop(teleop_config) + +# Connect +robot.connect() +teleop.connect() + +# Teleoperate (use keyboard controls) +try: + while True: + action = teleop.get_action() + robot.send_action(action) +except KeyboardInterrupt: + pass +finally: + robot.disconnect() + teleop.disconnect() +``` + +> [!TIP] +> If you're using a Mac, you might need to give Terminal permission to access your keyboard for teleoperation. Go to System Preferences > Security & Privacy > Input Monitoring and check the box for Terminal. + +## Recording Data + +Once you can drive the robot well, you can start recording data to train AI models. The system records: + +- **What you do**: How you move the robot (forward, backward, turning) +- **What the robot sees**: + - Videos from both cameras + - Robot speed and direction + - Battery level and location + - GPS position and signal + - Other sensor data +- **When it happened**: Timestamps for everything + +### Setting Up Hugging Face + +We use Hugging Face to store your data online. First, log in with your token from [Hugging Face settings](https://huggingface.co/settings/tokens): + +```bash +hf auth login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential +``` + +Store your Hugging Face username: + +```bash +HF_USER=$(hf auth whoami | awk -F': *' 'NR==1 {print $2}') +echo $HF_USER +``` + +### Start Recording + +Use the standard recording command: + +```bash +lerobot-record \ + --robot.type=earthrover_mini_plus \ + --teleop.type=keyboard_rover \ + --dataset.repo_id=your_username/dataset_name \ + --dataset.num_episodes=2 \ + --dataset.fps=10 \ + --dataset.single_task="Navigate around obstacles" \ + --dataset.streaming_encoding=true \ + --dataset.encoder_threads=2 \ + # --dataset.vcodec=auto \ + --display_data=true +``` + +Replace `your_username/dataset_name` with your Hugging Face username and a name for your dataset. + +### What Gets Saved + +Your dataset includes: + +**Your Actions (2 features)**: + +- `linear_velocity`: How much you moved forward/backward +- `angular_velocity`: How much you turned left/right + +**Robot Observations (24 features)**: + +- Front camera video +- Rear camera video +- Current speed +- Battery level +- Orientation +- GPS (latitude, longitude, signal strength) +- Network signal strength +- Vibration level +- Lamp state (on/off) +- Accelerometer (x, y, z) +- Gyroscope (x, y, z) +- Magnetometer (x, y, z) +- Wheel RPMs (4 wheels) + +### Where Your Data Goes + +On your computer: `~/.cache/huggingface/lerobot/{repo-id}` + +After recording, your data automatically uploads to your Hugging Face page: + +```bash +echo https://huggingface.co/datasets/${HF_USER}/earthrover-navigation +``` + +Your dataset will be tagged with `LeRobot` for community discovery. diff --git a/lerobot/docs/source/env_processor.mdx b/lerobot/docs/source/env_processor.mdx new file mode 100644 index 0000000000000000000000000000000000000000..8dbf315c7a1ab4294a95990176275be647ed4a00 --- /dev/null +++ b/lerobot/docs/source/env_processor.mdx @@ -0,0 +1,418 @@ +# Environment Processors + +Environment processors are a critical layer in LeRobot's data processing architecture that handle **environment-specific** transformations, separate from policy-specific processing. This separation of concerns enables cleaner code, better modularity, and easier experimentation with different environments and policies. + +## Why Environment Processors? + +When working with different robot environments (LIBERO, MetaWorld, Aloha, etc.), each environment often has unique data formats, coordinate systems, and conventions that need standardization **before** policy processing. Without environment processors, these transformations would be: + +1. **Hardcoded in environment code** - Making it difficult to experiment with different state representations +2. **Duplicated across policies** - Each policy would need to handle environment-specific quirks +3. **Mixed with policy logic** - Violating separation of concerns and making debugging harder + +Environment processors solve this by providing a **dedicated processing layer** between raw environment observations and policy inputs. + +## The Processing Pipeline + +Here's how data flows through the complete processing pipeline during evaluation: + +```python +# In lerobot_eval.py rollout() function: + +# 1. Raw environment observation (numpy arrays, various formats) +raw_observation = env.step(action) + +# 2. Convert numpy to torch, normalize images [0,1] +observation = preprocess_observation(raw_observation) + +# 3. Add task metadata (for multi-task environments) +observation = add_envs_task(env, observation) + +# 4. ENVIRONMENT-SPECIFIC preprocessing (NEW!) +# - Flatten robot states +# - Rotate images to match dataset conventions +# - Handle environment-specific coordinate systems +observation = env_preprocessor(observation) + +# 5. POLICY-SPECIFIC preprocessing +# - Normalize with dataset statistics +# - Add batch dimensions +# - Move to GPU +# - Tokenize language instructions +observation = preprocessor(observation) + +# 6. Policy inference +action = policy.select_action(observation) + +# 7. POLICY-SPECIFIC postprocessing +# - Unnormalize actions +# - Remove batch dimensions +action = postprocessor(action) + +# 8. ENVIRONMENT-SPECIFIC postprocessing (NEW!) +# - Convert action formats if needed +# - Apply environment-specific constraints +action_transition = {"action": action} +action_transition = env_postprocessor(action_transition) +action = action_transition["action"] + +# 9. Execute in environment +env.step(action) +``` + +## The Benefits + +### 1. **Separation of Concerns** + +Environment processors handle transformations specific to the **environment's data format**, while policy processors handle transformations specific to the **model's requirements**. + +```python +# ❌ Before: Mixed concerns +class LiberoVLAPolicy: + def preprocess(self, obs): + # Environment-specific: Flatten robot state (shouldn't be in policy!) + state = self._flatten_robot_state(obs["robot_state"]) + # Policy-specific: Normalize with dataset stats + state = self.normalizer(state) + return state + +# ✅ After: Clear separation +# Environment processor: Handles LIBERO's nested robot state +env_preprocessor = LiberoProcessorStep() # Flattens robot_state + +# Policy processor: Handles model requirements +policy_preprocessor = NormalizerProcessorStep(stats=dataset_stats) +``` + +### 2. **Flexibility and Reusability** + +The same policy can work with different environment processors, and the same environment processor can work with different policies: + +```python +# Use SmolVLA policy with LIBERO environment +libero_preprocessor, libero_postprocessor = make_env_pre_post_processors(libero_cfg) +smolvla_preprocessor, smolvla_postprocessor = make_pre_post_processors(smolvla_cfg) + +# Or use ACT policy with the same LIBERO environment +libero_preprocessor, libero_postprocessor = make_env_pre_post_processors(libero_cfg) +act_preprocessor, act_postprocessor = make_pre_post_processors(act_cfg) +``` + +### 3. **Easier Experimentation** + +Want to try different state representations for LIBERO? Just create a new processor: + +```python +# Original: 8D state (pos + quat→axisangle + gripper) +@ProcessorStepRegistry.register("libero_processor") +class LiberoProcessorStep(ObservationProcessorStep): + def _process_observation(self, obs): + eef_pos = robot_state["eef"]["pos"] # 3D + eef_axisangle = quat2axisangle(quat) # 3D + gripper = robot_state["gripper"]["qpos"] # 2D + state = torch.cat([eef_pos, eef_axisangle, gripper], dim=-1) # 8D + return state + +# Experiment: Add velocity for better control +@ProcessorStepRegistry.register("libero_velocity_processor") +class LiberoVelocityProcessorStep(ObservationProcessorStep): + def _process_observation(self, obs): + # Include velocities for 14D state + eef_pos = robot_state["eef"]["pos"] # 3D + eef_axisangle = quat2axisangle(quat) # 3D + eef_vel = robot_state["eef"]["vel"] # 3D (NEW) + gripper_pos = robot_state["gripper"]["qpos"] # 2D + gripper_vel = robot_state["gripper"]["qvel"] # 3D (NEW) + state = torch.cat([eef_pos, eef_axisangle, eef_vel, + gripper_pos, gripper_vel], dim=-1) # 14D + return state +``` + +### 4. **Cleaner Environment Code** + +Environments expose **all available data** without needing to know what downstream models will use: + +```python +# LIBERO environment exposes full robot state +observation = { + "pixels": {"image": img, "image2": img2}, + "robot_state": { + "eef": {"pos": ..., "quat": ..., "vel": ..., "mat": ..., "axisangle": ...}, + "gripper": {"qpos": ..., "qvel": ...}, + "joints": {"pos": ..., "vel": ...} + } +} + +# Environment processor decides what to use +# Policy processor handles model-specific transformations +``` + +## Using Environment Processors + +### Factory Function + +The `make_env_pre_post_processors` function follows the same pattern as `make_pre_post_processors` for policies: + +```python +from lerobot.envs.factory import make_env_pre_post_processors +from lerobot.envs.configs import LiberoEnv, PushtEnv + +# For LIBERO: Returns LiberoProcessorStep in preprocessor +libero_cfg = LiberoEnv(task="libero_spatial", camera_name=["agentview"]) +env_preprocessor, env_postprocessor = make_env_pre_post_processors(libero_cfg) + +# For other environments: Returns identity processors (no-op) +pusht_cfg = PushtEnv() +env_preprocessor, env_postprocessor = make_env_pre_post_processors(pusht_cfg) +``` + +### Implementation in `envs/factory.py` + +```python +def make_env_pre_post_processors( + env_cfg: EnvConfig, +) -> tuple[ + PolicyProcessorPipeline[dict[str, Any], dict[str, Any]], + PolicyProcessorPipeline[dict[str, Any], dict[str, Any]], +]: + """ + Create preprocessor and postprocessor pipelines for environment observations. + + Args: + env_cfg: The configuration of the environment. + + Returns: + A tuple containing: + - preprocessor: Pipeline that processes environment observations + - postprocessor: Pipeline that processes environment outputs + """ + # For LIBERO environments, add the LiberoProcessorStep to preprocessor + if isinstance(env_cfg, LiberoEnv) or "libero" in env_cfg.type: + preprocessor = PolicyProcessorPipeline(steps=[LiberoProcessorStep()]) + else: + # For all other environments, return an identity preprocessor + preprocessor = PolicyProcessorPipeline(steps=[]) + + # Postprocessor is currently identity for all environments + # Future: Could add environment-specific action transformations + postprocessor = PolicyProcessorPipeline(steps=[]) + + return preprocessor, postprocessor +``` + +### Integration in Evaluation + +In `lerobot_eval.py`, the environment processors are created once and used throughout: + +```python +def eval_main(cfg: EvalPipelineConfig): + # Create environment + envs = make_env(cfg.env, n_envs=cfg.eval.batch_size) + + # Create policy + policy = make_policy(cfg=cfg.policy, env_cfg=cfg.env) + + # Create policy processors + preprocessor, postprocessor = make_pre_post_processors( + policy_cfg=cfg.policy, + pretrained_path=cfg.policy.pretrained_path, + ) + + # Create environment processors (NEW!) + env_preprocessor, env_postprocessor = make_env_pre_post_processors(env_cfg=cfg.env) + + # Run evaluation with both processor types + eval_policy_all( + envs=envs, + policy=policy, + env_preprocessor=env_preprocessor, # Environment-specific + env_postprocessor=env_postprocessor, # Environment-specific + preprocessor=preprocessor, # Policy-specific + postprocessor=postprocessor, # Policy-specific + n_episodes=cfg.eval.n_episodes, + ) +``` + +## Example: LIBERO Environment Processor + +The `LiberoProcessorStep` demonstrates a real-world environment processor: + +```python +from lerobot.processor.pipeline import ObservationProcessorStep + +@dataclass +@ProcessorStepRegistry.register(name="libero_processor") +class LiberoProcessorStep(ObservationProcessorStep): + """ + Processes LIBERO observations into the LeRobot format. + + **State Processing:** + - Extracts end-effector position (3D) + - Converts quaternion to axis-angle representation (3D) + - Extracts gripper joint positions (2D) + - Concatenates into 8D state vector + + **Image Processing:** + - Rotates images 180° to match HuggingFaceVLA/libero convention + """ + + def _process_observation(self, observation): + processed_obs = observation.copy() + + # Process images: Flip 180° for camera convention + for key in list(processed_obs.keys()): + if key.startswith("observation.images."): + img = processed_obs[key] + img = torch.flip(img, dims=[2, 3]) # Flip H and W + processed_obs[key] = img + + # Process robot_state: Flatten to 8D vector + if "observation.robot_state" in processed_obs: + robot_state = processed_obs.pop("observation.robot_state") + + eef_pos = robot_state["eef"]["pos"] # (B, 3) + eef_quat = robot_state["eef"]["quat"] # (B, 4) + gripper_qpos = robot_state["gripper"]["qpos"] # (B, 2) + + # Convert quaternion to axis-angle + eef_axisangle = self._quat2axisangle(eef_quat) # (B, 3) + + # Concatenate into single state vector + state = torch.cat((eef_pos, eef_axisangle, gripper_qpos), dim=-1) + state = state.float() + + processed_obs["observation.state"] = state + + return processed_obs +``` + +### Why These Transformations? + +1. **Image Rotation**: The HuggingFaceVLA/libero dataset has images rotated 180° from the raw LIBERO simulator. The processor handles this convention mismatch so policies trained on the dataset work seamlessly. + +2. **State Flattening**: The raw LIBERO environment exposes nested dictionaries with all available state information (position, quaternion, velocity, matrix representation, etc.). The processor: + - Selects the relevant components (pos, quat, gripper) + - Converts quaternion to axis-angle (more suitable for learning) + - Flattens to a single 8D vector that policies expect + +3. **Flexibility**: The environment still exposes **all** raw data. If you want to try different state representations (e.g., including velocities, using matrix representation instead of axis-angle), you can create a new processor without modifying the environment code. + +## Adding Environment Processors for New Environments + +To add environment processors for a new environment: + +### 1. Create the Processor Step + +```python +# In src/lerobot/processor/env_processor.py + +@dataclass +@ProcessorStepRegistry.register(name="myenv_processor") +class MyEnvProcessorStep(ObservationProcessorStep): + """Process observations from MyEnv.""" + + def _process_observation(self, observation): + processed = observation.copy() + + # Your environment-specific transformations + if "myenv.specific.state" in processed: + state = processed.pop("myenv.specific.state") + # Transform to standard format + processed["observation.state"] = self._transform_state(state) + + return processed +``` + +### 2. Update the Factory + +```python +# In src/lerobot/envs/factory.py + +def make_env_pre_post_processors(env_cfg: EnvConfig): + if isinstance(env_cfg, LiberoEnv) or "libero" in env_cfg.type: + preprocessor = PolicyProcessorPipeline(steps=[LiberoProcessorStep()]) + elif isinstance(env_cfg, MyEnvConfig) or "myenv" in env_cfg.type: + preprocessor = PolicyProcessorPipeline(steps=[MyEnvProcessorStep()]) + else: + preprocessor = PolicyProcessorPipeline(steps=[]) + + postprocessor = PolicyProcessorPipeline(steps=[]) + return preprocessor, postprocessor +``` + +### 3. Use in Evaluation + +No changes needed! The evaluation script automatically uses the appropriate processor: + +```bash +lerobot-eval \ + --policy.path=lerobot/my_policy \ + --env.type=myenv \ # Automatically uses MyEnvProcessorStep + --eval.n_episodes=10 +``` + +## Future: Environment Postprocessors + +Currently, postprocessors are identity (no-op) for all environments. Future use cases include: + +### Action Space Transformations + +```python +@dataclass +class MyEnvActionPostprocessor(ProcessorStep): + """Convert policy actions to environment-specific format.""" + + def __call__(self, transition: EnvTransition) -> EnvTransition: + action = transition["action"] + + # Example: Convert from Cartesian to joint space + if self.action_space == "joint": + action = self.ik_solver(action) + + # Example: Apply environment-specific safety limits + action = torch.clamp(action, self.min_action, self.max_action) + + transition["action"] = action + return transition +``` + +### Coordinate System Conversions + +```python +@dataclass +class CoordinateTransformPostprocessor(ProcessorStep): + """Transform actions between coordinate systems.""" + + def __call__(self, transition: EnvTransition) -> EnvTransition: + action = transition["action"] + + # Example: Policy outputs in world frame, env expects base frame + action = self.world_to_base_transform(action) + + transition["action"] = action + return transition +``` + +## Best Practices + +1. **Keep environment processors simple**: They should only handle environment-specific data format issues, not complex learning-related transformations. + +2. **Use policy processors for model requirements**: Normalization, batching, device placement, and tokenization belong in policy processors. + +3. **Expose all data from environments**: Let processors decide what to use rather than hardcoding choices in the environment. + +4. **Document conventions**: Clearly document any coordinate system conventions, camera orientations, or data formats that your processor handles. + +5. **Test independently**: Environment processors should be testable without loading full policies or environments. + +## Summary + +Environment processors provide a **clean separation** between environment-specific data transformations and policy-specific model requirements. This architecture: + +- ✅ Enables easy experimentation with different state representations +- ✅ Allows policies to work seamlessly across different environments +- ✅ Keeps environment code focused on simulation/hardware interface +- ✅ Makes processor pipelines more maintainable and debuggable +- ✅ Follows the single responsibility principle + +The key insight: **Environments define data formats, processors standardize them, policies consume standardized data.** Each layer has a clear, focused responsibility. diff --git a/lerobot/docs/source/envhub.mdx b/lerobot/docs/source/envhub.mdx new file mode 100644 index 0000000000000000000000000000000000000000..36c08a8b32ae87e92f987b7976594a37cd0220a1 --- /dev/null +++ b/lerobot/docs/source/envhub.mdx @@ -0,0 +1,431 @@ +# Loading Environments from the Hub + +The **EnvHub** feature allows you to load simulation environments directly from the Hugging Face Hub with a single line of code. This unlocks a powerful new model for collaboration: instead of environments being locked away inside monolithic libraries, anyone can publish custom environments and share them with the community. + +## What is EnvHub? + +EnvHub lets you create custom robotics simulation environments with your own robot models and scenarios, and make them easily usable by anyone through the LeRobot framework. + +EnvHub packages are stored on the Hugging Face Hub, and can be seamlessly pulled and used in your AI robotics projects through LeRobot with a single line of code. + +Thanks to EnvHub, you can: + +1. **Create and publish environments** to the Hugging Face Hub as Git repositories, and distribute complex physics simulations without packaging hassles +2. **Load environments** dynamically, without installing them as packages +3. **Version and track** environment changes using Git semantics +4. **Discover** new simulation tasks shared by the community + +This design means you can go from discovering an interesting environment on the Hub to running experiments in seconds, or create your own custom robot and environment without worrying about dependency conflicts or complex installation procedures. + +When you create an EnvHub package, you can build anything you want inside it and use any simulation tool you like: this is your own space to play with. The only requirement is that the package contains an `env.py` file that defines the environment and allows LeRobot to load and use your EnvHub package. + +This `env.py` file needs to expose a small API so LeRobot can load and run it. In particular, you must provide a `make_env(n_envs: int = 1, use_async_envs: bool = False)` or `make_env(n_envs: int = 1, use_async_envs: bool = False, cfg: EnvConfig)` function, which is the main entry point for LeRobot. It should return one of: + +- A `gym.vector.VectorEnv` (most common) +- A single `gym.Env` (will be automatically wrapped) +- A dict mapping `{suite_name: {task_id: VectorEnv}}` (for multi-task benchmarks) + +You can also pass an `EnvConfig` object to `make_env` to configure the environment (e.g. the number of environments, task, camera name, initial states, control mode, episode length, etc.). + +Finally, your environment must implement the standard `gym.vector.VectorEnv` interface so it works with LeRobot, including methods like `reset` and `step`. + +## Quick Start + +Loading an environment from the Hub is as simple as: + +```python +from lerobot.envs.factory import make_env + +# Load a hub environment (requires explicit consent to run remote code) +env = make_env("lerobot/cartpole-env", trust_remote_code=True) +``` + + + **Security Notice**: Loading environments from the Hub executes Python code + from third-party repositories. Only use `trust_remote_code=True` with + repositories you trust. We strongly recommend pinning to a specific commit + hash for reproducibility and security. + + +## Repository Structure + +To make your environment loadable from the Hub, your repository must contain at minimum: + +### Required Files + +**`env.py`** (or custom Python file) + +- Must expose a `make_env(n_envs: int, use_async_envs: bool)` function +- This function should return one of: + - A `gym.vector.VectorEnv` (most common) + - A single `gym.Env` (will be automatically wrapped) + - A dict mapping `{suite_name: {task_id: VectorEnv}}` (for multi-task benchmarks) + +### Optional Files + +**`requirements.txt`** + +- List any additional dependencies your environment needs +- Users will need to install these manually before loading your environment + +**`README.md`** + +- Document your environment: what task it implements, observation/action spaces, rewards, etc. +- Include usage examples and any special setup instructions + +**`.gitignore`** + +- Exclude unnecessary files from your repository + +### Example Repository Structure + +``` +my-environment-repo/ +├── env.py # Main environment definition (required) +├── requirements.txt # Dependencies (optional) +├── README.md # Documentation (recommended) +├── assets/ # Images, videos, etc. (optional) +│ └── demo.gif +└── configs/ # Config files if needed (optional) + └── task_config.yaml +``` + +## Creating Your Environment Repository + +### Step 1: Define Your Environment + +Create an `env.py` file with a `make_env` function: + +```python +# env.py +import gymnasium as gym + +def make_env(n_envs: int = 1, use_async_envs: bool = False): + """ + Create vectorized environments for your custom task. + + Args: + n_envs: Number of parallel environments + use_async_envs: Whether to use AsyncVectorEnv or SyncVectorEnv + + Returns: + gym.vector.VectorEnv or dict mapping suite names to vectorized envs + """ + def _make_single_env(): + # Create your custom environment + return gym.make("CartPole-v1") + + # Choose vector environment type + env_cls = gym.vector.AsyncVectorEnv if use_async_envs else gym.vector.SyncVectorEnv + + # Create vectorized environment + vec_env = env_cls([_make_single_env for _ in range(n_envs)]) + + return vec_env +``` + +### Step 2: Test Locally + +Before uploading, test your environment locally: + +```python +from lerobot.envs.utils import _load_module_from_path, _call_make_env, _normalize_hub_result + +# Load your module +module = _load_module_from_path("./env.py") + +# Test the make_env function +result = _call_make_env(module, n_envs=2, use_async_envs=False) +normalized = _normalize_hub_result(result) + +# Verify it works +suite_name = next(iter(normalized)) +env = normalized[suite_name][0] +obs, info = env.reset() +print(f"Observation shape: {obs.shape if hasattr(obs, 'shape') else type(obs)}") +env.close() +``` + +### Step 3: Upload to the Hub + +Upload your repository to Hugging Face: + +```bash +# Install huggingface_hub if needed +pip install huggingface_hub + +# Login to Hugging Face +hf auth login + +# Create a new repository +hf repo create my-org/my-custom-env + +# Initialize git and push +git init +git add . +git commit -m "Initial environment implementation" +git remote add origin https://huggingface.co/my-org/my-custom-env +git push -u origin main +``` + +Alternatively, use the `huggingface_hub` Python API: + +```python +from huggingface_hub import HfApi + +api = HfApi() + +# Create repository +api.create_repo("my-custom-env", repo_type="space") + +# Upload files +api.upload_folder( + folder_path="./my-env-folder", + repo_id="username/my-custom-env", + repo_type="space", +) +``` + +## Loading Environments from the Hub + +### Basic Usage + +```python +from lerobot.envs.factory import make_env + +# Load from the hub +envs_dict = make_env( + "username/my-custom-env", + n_envs=4, + trust_remote_code=True +) + +# Access the environment +suite_name = next(iter(envs_dict)) +env = envs_dict[suite_name][0] + +# Use it like any gym environment +obs, info = env.reset() +action = env.action_space.sample() +obs, reward, terminated, truncated, info = env.step(action) +``` + +### Advanced: Pinning to Specific Versions + +For reproducibility and security, pin to a specific Git revision: + +```python +# Pin to a specific branch +env = make_env("username/my-env@main", trust_remote_code=True) + +# Pin to a specific commit (recommended for papers/experiments) +env = make_env("username/my-env@abc123def456", trust_remote_code=True) + +# Pin to a tag +env = make_env("username/my-env@v1.0.0", trust_remote_code=True) +``` + +### Custom File Paths + +If your environment definition is not in `env.py`: + +```python +# Load from a custom file +env = make_env("username/my-env:custom_env.py", trust_remote_code=True) + +# Combine with version pinning +env = make_env("username/my-env@v1.0:envs/task_a.py", trust_remote_code=True) +``` + +### Async Environments + +For better performance with multiple environments: + +```python +envs_dict = make_env( + "username/my-env", + n_envs=8, + use_async_envs=True, # Use AsyncVectorEnv for parallel execution + trust_remote_code=True +) +``` + +## URL Format Reference + +The hub URL format supports several patterns: + +| Pattern | Description | Example | +| -------------------- | ------------------------------ | -------------------------------------- | +| `user/repo` | Load `env.py` from main branch | `make_env("lerobot/pusht-env")` | +| `user/repo@revision` | Load from specific revision | `make_env("lerobot/pusht-env@main")` | +| `user/repo:path` | Load custom file | `make_env("lerobot/envs:pusht.py")` | +| `user/repo@rev:path` | Revision + custom file | `make_env("lerobot/envs@v1:pusht.py")` | + +## Multi-Task Environments + +For benchmarks with multiple tasks (like LIBERO), return a nested dictionary: + +```python +def make_env(n_envs: int = 1, use_async_envs: bool = False): + env_cls = gym.vector.AsyncVectorEnv if use_async_envs else gym.vector.SyncVectorEnv + + # Return dict: {suite_name: {task_id: VectorEnv}} + return { + "suite_1": { + 0: env_cls([lambda: gym.make("Task1-v0") for _ in range(n_envs)]), + 1: env_cls([lambda: gym.make("Task2-v0") for _ in range(n_envs)]), + }, + "suite_2": { + 0: env_cls([lambda: gym.make("Task3-v0") for _ in range(n_envs)]), + } + } +``` + +## Security Considerations + + + **Important**: The `trust_remote_code=True` flag is required to execute + environment code from the Hub. This is by design for security. + + +When loading environments from the Hub: + +1. **Review the code first**: Visit the repository and inspect `env.py` before loading +2. **Pin to commits**: Use specific commit hashes for reproducibility +3. **Check dependencies**: Review `requirements.txt` for suspicious packages +4. **Use trusted sources**: Prefer official organizations or well-known researchers +5. **Sandbox if needed**: Run untrusted code in isolated environments (containers, VMs) + +Example of safe usage: + +```python +# ❌ BAD: Loading without inspection +env = make_env("random-user/untrusted-env", trust_remote_code=True) + +# ✅ GOOD: Review code, then pin to specific commit +# 1. Visit https://huggingface.co/trusted-org/verified-env +# 2. Review the env.py file +# 3. Copy the commit hash +env = make_env("trusted-org/verified-env@a1b2c3d4", trust_remote_code=True) +``` + +## Example: CartPole from the Hub + +Here's a complete example using the reference CartPole environment: + +```python +from lerobot.envs.factory import make_env +import numpy as np + +# Load the environment +envs_dict = make_env("lerobot/cartpole-env", n_envs=4, trust_remote_code=True) + +# Get the vectorized environment +suite_name = next(iter(envs_dict)) +env = envs_dict[suite_name][0] + +# Run a simple episode +obs, info = env.reset() +done = np.zeros(env.num_envs, dtype=bool) +total_reward = np.zeros(env.num_envs) + +while not done.all(): + # Random policy + action = env.action_space.sample() + obs, reward, terminated, truncated, info = env.step(action) + total_reward += reward + done = terminated | truncated + +print(f"Average reward: {total_reward.mean():.2f}") +env.close() +``` + +## Benefits of EnvHub + +### For Environment Authors + +- **Easy distribution**: No PyPI packaging required +- **Version control**: Use Git for environment versioning +- **Rapid iteration**: Push updates instantly +- **Documentation**: Hub README renders beautifully +- **Community**: Reach LeRobot users directly + +### For Researchers + +- **Quick experiments**: Load any environment in one line +- **Reproducibility**: Pin to specific commits +- **Discovery**: Browse environments on the Hub +- **No conflicts**: No need to install conflicting packages + +### For the Community + +- **Growing ecosystem**: More diverse simulation tasks +- **Standardization**: Common `make_env` API +- **Collaboration**: Fork and improve existing environments +- **Accessibility**: Lower barrier to sharing research + +## Troubleshooting + +### "Refusing to execute remote code" + +You must explicitly pass `trust_remote_code=True`: + +```python +env = make_env("user/repo", trust_remote_code=True) +``` + +### "Module X not found" + +The hub environment has dependencies you need to install: + +```bash +# Check the repo's requirements.txt and install dependencies +pip install gymnasium numpy +``` + +### "make_env not found in module" + +Your `env.py` must expose a `make_env` function: + +```python +def make_env(n_envs: int, use_async_envs: bool): + # Your implementation + pass +``` + +### Environment returns wrong type + +The `make_env` function must return: + +- A `gym.vector.VectorEnv`, or +- A single `gym.Env`, or +- A dict `{suite_name: {task_id: VectorEnv}}` + +## Best Practices + +1. **Document your environment**: Include observation/action space descriptions, reward structure, and termination conditions in your README +2. **Add requirements.txt**: List all dependencies with versions +3. **Test thoroughly**: Verify your environment works locally before pushing +4. **Use semantic versioning**: Tag releases with version numbers +5. **Add examples**: Include usage examples in your README +6. **Keep it simple**: Minimize dependencies when possible +7. **License your work**: Add a LICENSE file to clarify usage terms + +## Future Directions + +The EnvHub ecosystem enables exciting possibilities: + +- **GPU-accelerated physics**: Share Isaac Gym or Brax environments +- **Photorealistic rendering**: Distribute environments with advanced graphics +- **Multi-agent scenarios**: Complex interaction tasks +- **Real-world simulators**: Digital twins of physical setups +- **Procedural generation**: Infinite task variations +- **Domain randomization**: Pre-configured DR pipelines + +As more researchers and developers contribute, the diversity and quality of available environments will grow, benefiting the entire robotics learning community. + +## See Also + +- [Hugging Face Hub Documentation](https://huggingface.co/docs/hub/en/index) +- [Gymnasium Documentation](https://gymnasium.farama.org/index.html) +- [Example Hub Environment](https://huggingface.co/lerobot/cartpole-env) diff --git a/lerobot/docs/source/envhub_isaaclab_arena.mdx b/lerobot/docs/source/envhub_isaaclab_arena.mdx new file mode 100644 index 0000000000000000000000000000000000000000..828d51bad30db3a263e6ca43098bcba64044cf20 --- /dev/null +++ b/lerobot/docs/source/envhub_isaaclab_arena.mdx @@ -0,0 +1,510 @@ +# NVIDIA IsaacLab Arena & LeRobot + +LeRobot EnvHub now supports **GPU-accelerated simulation** with IsaacLab Arena for policy evaluation at scale. +Train and evaluate imitation learning policies with high-fidelity simulation — all integrated into the LeRobot ecosystem. + +IsaacLab Arena - GR1 Microwave Environment + +[IsaacLab Arena](https://github.com/isaac-sim/IsaacLab-Arena) integrates with NVIDIA IsaacLab to provide: + +- 🤖 **Humanoid embodiments**: GR1, G1, Galileo with various configurations +- 🎯 **Manipulation & loco-manipulation tasks**: Door opening, pick-and-place, button pressing, and more +- ⚡ **GPU-accelerated rollouts**: Parallel environment execution on NVIDIA GPUs +- 🖼️ **RTX Rendering**: Evaluate vision-based policies with realistic rendering, reflections and refractions +- 📦 **LeRobot-compatible datasets**: Ready for training with GR00T N1x, PI0, SmolVLA, ACT, and Diffusion policies +- 🔄 **EnvHub integration**: Load environments from HuggingFace EnvHub with one line + +## Installation + +### Prerequisites + +Hardware requirements are shared with Isaac Sim, and are detailed in [Isaac Sim Requirements](https://docs.isaacsim.omniverse.nvidia.com/5.1.0/installation/requirements.html). + +- NVIDIA GPU with CUDA support +- NVIDIA driver compatible with IsaacSim 5.1.0 +- Linux (Ubuntu 22.04 / 24.04) + +### Setup + +```bash +# 1. Create conda environment +conda create -y -n lerobot-arena python=3.11 +conda activate lerobot-arena +conda install -y -c conda-forge ffmpeg=7.1.1 + +# 2. Install Isaac Sim 5.1.0 +pip install "isaacsim[all,extscache]==5.1.0" --extra-index-url https://pypi.nvidia.com + +# Accept NVIDIA EULA (required) +export ACCEPT_EULA=Y +export PRIVACY_CONSENT=Y + +# 3. Install IsaacLab 2.3.0 +git clone https://github.com/isaac-sim/IsaacLab.git +cd IsaacLab +git checkout v2.3.0 +./isaaclab.sh -i +cd .. + +# 4. Install IsaacLab Arena +git clone https://github.com/isaac-sim/IsaacLab-Arena.git +cd IsaacLab-Arena +git checkout release/0.1.1 +pip install -e . +cd .. + + +# 5. Install LeRobot +git clone https://github.com/huggingface/lerobot.git +cd lerobot +pip install -e . +cd .. + + +# 6. Install additional dependencies +pip install onnxruntime==1.23.2 lightwheel-sdk==1.0.1 vuer[all]==0.0.70 qpsolvers==4.8.1 +pip install numpy==1.26.0 # Isaac Sim 5.1 depends on numpy==1.26.0, this will be fixed in next release +``` + +## Evaluating Policies + +### Pre-trained Policies + +The following trained policies are available: + +| Policy | Architecture | Task | Link | +| :-------------------------- | :----------- | :------------ | :----------------------------------------------------------------------- | +| pi05-arena-gr1-microwave | PI0.5 | GR1 Microwave | [HuggingFace](https://huggingface.co/nvidia/pi05-arena-gr1-microwave) | +| smolvla-arena-gr1-microwave | SmolVLA | GR1 Microwave | [HuggingFace](https://huggingface.co/nvidia/smolvla-arena-gr1-microwave) | + +### Evaluate SmolVLA + +```bash +pip install -e ".[smolvla]" +pip install numpy==1.26.0 # revert numpy to version 1.26 +``` + +```bash +lerobot-eval \ + --policy.path=nvidia/smolvla-arena-gr1-microwave \ + --env.type=isaaclab_arena \ + --env.hub_path=nvidia/isaaclab-arena-envs \ + --rename_map='{"observation.images.robot_pov_cam_rgb": "observation.images.robot_pov_cam"}' \ + --policy.device=cuda \ + --env.environment=gr1_microwave \ + --env.embodiment=gr1_pink \ + --env.object=mustard_bottle \ + --env.headless=false \ + --env.enable_cameras=true \ + --env.video=true \ + --env.video_length=10 \ + --env.video_interval=15 \ + --env.state_keys=robot_joint_pos \ + --env.camera_keys=robot_pov_cam_rgb \ + --trust_remote_code=True \ + --eval.batch_size=1 +``` + +### Evaluate PI0.5 + +```bash +pip install -e ".[pi]" +pip install numpy==1.26.0 # revert numpy to version 1.26 +``` + +PI0.5 requires disabling torch compile for evaluation: + +```bash +TORCH_COMPILE_DISABLE=1 TORCHINDUCTOR_DISABLE=1 lerobot-eval \ + --policy.path=nvidia/pi05-arena-gr1-microwave \ + --env.type=isaaclab_arena \ + --env.hub_path=nvidia/isaaclab-arena-envs \ + --rename_map='{"observation.images.robot_pov_cam_rgb": "observation.images.robot_pov_cam"}' \ + --policy.device=cuda \ + --env.environment=gr1_microwave \ + --env.embodiment=gr1_pink \ + --env.object=mustard_bottle \ + --env.headless=false \ + --env.enable_cameras=true \ + --env.video=true \ + --env.video_length=15 \ + --env.video_interval=15 \ + --env.state_keys=robot_joint_pos \ + --env.camera_keys=robot_pov_cam_rgb \ + --trust_remote_code=True \ + --eval.batch_size=1 +``` + + + To change the number of parallel environments, use the ```--eval.batch_size``` + flag. + + +### What to Expect + +During evaluation, you will see a progress bar showing the running success rate: + +``` +Stepping through eval batches: 8%|██████▍ | 4/50 [00:45<08:06, 10.58s/it, running_success_rate=25.0%] +``` + +### Video Recording + +To enable video recording during evaluation, add the following flags to your command: + +```bash +--env.video=true \ +--env.video_length=15 \ +--env.video_interval=15 +``` + +For more details on video recording, see the [IsaacLab Recording Documentation](https://isaac-sim.github.io/IsaacLab/main/source/how-to/record_video.html). + + +When running headless with `--env.headless=true`, you must also enable cameras explicitly for camera enabled environments: + +```bash +--env.headless=true --env.enable_cameras=true +``` + + + +### Output Directory + +Evaluation videos are saved to the output directory with the following structure: + +``` +outputs/eval//__/videos/_/eval_episode_.mp4 +``` + +For example: + +``` +outputs/eval/2026-01-02/14-38-01_isaaclab_arena_smolvla/videos/gr1_microwave_0/eval_episode_0.mp4 +``` + +## Training Policies + +To learn more about training policies with LeRobot, please refer to the training documentation: + +- [SmolVLA](./smolvla) +- [Pi0.5](./pi05) +- [GR00T N1.5](./groot) + +Sample IsaacLab Arena datasets are available on HuggingFace Hub for experimentation: + +| Dataset | Description | Frames | +| :-------------------------------------------------------------------------------------------------------- | :------------------------- | :----- | +| [Arena-GR1-Manipulation-Task](https://huggingface.co/datasets/nvidia/Arena-GR1-Manipulation-Task-v3) | GR1 microwave manipulation | ~4K | +| [Arena-G1-Loco-Manipulation-Task](https://huggingface.co/datasets/nvidia/Arena-G1-Loco-Manipulation-Task) | G1 loco-manipulation | ~4K | + +## Environment Configuration + +### Full Configuration Options + +```python +from lerobot.envs.configs import IsaaclabArenaEnv + +config = IsaaclabArenaEnv( + # Environment selection + environment="gr1_microwave", # Task environment + embodiment="gr1_pink", # Robot embodiment + object="power_drill", # Object to manipulate + + # Simulation settings + episode_length=300, # Max steps per episode + headless=True, # Run without GUI + device="cuda:0", # GPU device + seed=42, # Random seed + + # Observation configuration + state_keys="robot_joint_pos", # State observation keys (comma-separated) + camera_keys="robot_pov_cam_rgb", # Camera observation keys (comma-separated) + state_dim=54, # Expected state dimension + action_dim=36, # Expected action dimension + camera_height=512, # Camera image height + camera_width=512, # Camera image width + enable_cameras=True, # Enable camera observations + + # Video recording + video=False, # Enable video recording + video_length=100, # Frames per video + video_interval=200, # Steps between recordings + + # Advanced + mimic=False, # Enable mimic mode + teleop_device=None, # Teleoperation device + disable_fabric=False, # Disable fabric optimization + enable_pinocchio=True, # Enable Pinocchio for IK +) +``` + +### Using Environment Hub directly for advanced usage + +Create a file called `test_env_load_arena.py` or [download from the EnvHub](https://huggingface.co/nvidia/isaaclab-arena-envs/blob/main/tests/test_env_load_arena.py): + +```python +import logging +from dataclasses import asdict +from pprint import pformat +import torch +import tqdm +from lerobot.configs import parser +from lerobot.configs.eval import EvalPipelineConfig + + +@parser.wrap() +def main(cfg: EvalPipelineConfig): + """Run random action rollout for IsaacLab Arena environment.""" + logging.info(pformat(asdict(cfg))) + + from lerobot.envs.factory import make_env + + env_dict = make_env( + cfg.env, + n_envs=cfg.env.num_envs, + trust_remote_code=True, + ) + env = next(iter(env_dict.values()))[0] + env.reset() + for _ in tqdm.tqdm(range(cfg.env.episode_length)): + with torch.inference_mode(): + actions = env.action_space.sample() + obs, rewards, terminated, truncated, info = env.step(actions) + if terminated.any() or truncated.any(): + obs, info = env.reset() + env.close() + + +if __name__ == "__main__": + main() +``` + +Run with: + +```bash +python test_env_load_arena.py \ + --env.environment=g1_locomanip_pnp \ + --env.embodiment=gr1_pink \ + --env.object=cracker_box \ + --env.num_envs=4 \ + --env.enable_cameras=true \ + --env.seed=1000 \ + --env.video=true \ + --env.video_length=10 \ + --env.video_interval=15 \ + --env.headless=false \ + --env.hub_path=nvidia/isaaclab-arena-envs \ + --env.type=isaaclab_arena +``` + +## Creating New Environments + +First create a new IsaacLab Arena environment by following the [IsaacLab Arena Documentation](https://isaac-sim.github.io/IsaacLab-Arena/release/0.1.1/index.html). + +Clone our EnvHub repo: + +```bash +git clone https://huggingface.co/nvidia/isaaclab-arena-envs +``` + +Modify the `example_envs.yaml` file based on your new environment. +[Upload](./envhub#step-3-upload-to-the-hub) your modified repo to HuggingFace EnvHub. + + + Your IsaacLab Arena environment code must be locally available during + evaluation. Users can clone your environment repository separately, or you can + bundle the environment code and assets directly in your EnvHub repo. + + +Then, when evaluating, use your new environment: + +```bash +lerobot-eval \ + --env.hub_path=/isaaclab-arena-envs \ + --env.environment= \ + ...other flags... +``` + +We look forward to your contributions! + +## Troubleshooting + +### CUDA out of memory + +Reduce `batch_size` or use a GPU with more VRAM: + +```bash +--eval.batch_size=1 +``` + +### EULA not accepted + +Set environment variables before running: + +```bash +export ACCEPT_EULA=Y +export PRIVACY_CONSENT=Y +``` + +### Video recording not working + +Enable cameras when running headless: + +```bash +--env.video=true --env.enable_cameras=true --env.headless=true +``` + +### Policy output dimension mismatch + +Ensure `action_dim` matches your policy: + +```bash +--env.action_dim=36 +``` + +### libGLU.so.1 Errors during Isaac Sim initialization + +Ensure you have the following dependencies installed, this is likely to happen on headless machines. + +```bash +sudo apt update && sudo apt install -y libglu1-mesa libxt6 +``` + +## See Also + +- [EnvHub Documentation](./envhub.mdx) - General EnvHub usage +- [IsaacLab Arena GitHub](https://github.com/isaac-sim/IsaacLab-Arena) +- [IsaacLab Documentation](https://isaac-sim.github.io/IsaacLab/) + +## Lightwheel LW-BenchHub + +[Lightwheel](https://www.lightwheel.ai) is bringing `Lightwheel-Libero-Tasks` and `Lightwheel-RoboCasa-Tasks` with 268 tasks to the LeRobot ecosystem. +LW-BenchHub collects and generates large-scale datasets via teleoperation that comply with the LeRobot specification, enabling out-of-the-box training and evaluation workflows. +With the unified interface provided by EnvHub, developers can quickly build end-to-end experimental pipelines. + +### Install + +Assuming you followed the [Installation](#installation) steps, you can install LW-BenchHub with: + +```bash +conda install pinocchio -c conda-forge -y +pip install numpy==1.26.0 # revert numpy to version 1.26 + +sudo apt-get install git-lfs && git lfs install + +git clone https://github.com/LightwheelAI/lw_benchhub +git lfs pull # Ensure LFS files (e.g., .usd assets) are downloaded + +cd lw_benchhub +pip install -e . +``` + +For more detailed instructions, please refer to the [LW-BenchHub Documentation](https://docs.lightwheel.net/lw_benchhub/usage/Installation). + +### Lightwheel Tasks Dataset + +LW-BenchHub datasets are available on HuggingFace Hub: + +| Dataset | Description | Tasks | Frames | +| :------------------------------------------------------------------------------------------------------------ | :---------------------- | :---- | :----- | +| [Lightwheel-Tasks-X7S](https://huggingface.co/datasets/LightwheelAI/Lightwheel-Tasks-X7S) | X7S LIBERO and RoboCasa | 117 | ~10.3M | +| [Lightwheel-Tasks-Double-Piper](https://huggingface.co/datasets/LightwheelAI/Lightwheel-Tasks-Double-Piper) | Double-Piper LIBERO | 130 | ~6.0M | +| [Lightwheel-Tasks-G1-Controller](https://huggingface.co/datasets/LightwheelAI/Lightwheel-Tasks-G1-Controller) | G1-Controller LIBERO | 62 | ~2.7M | +| [Lightwheel-Tasks-G1-WBC](https://huggingface.co/datasets/LightwheelAI/Lightwheel-Tasks-G1-WBC) | G1-WBC RoboCasa | 32 | ~1.5M | + +For training policies, refer to the [Training Policies](#training-policies) section. + +### Evaluating Policies + +#### Pre-trained Policies + +The following trained policies are available: + +| Policy | Architecture | Task | Layout | Robot | Link | +| :----------------------- | :----------- | :----------------------------- | :--------- | :-------------- | :------------------------------------------------------------------------------------ | +| smolvla-double-piper-pnp | SmolVLA | L90K1PutTheBlackBowlOnThePlate | libero-1-1 | DoublePiper-Abs | [HuggingFace](https://huggingface.co/LightwheelAI/smolvla-double-piper-pnp/tree/main) | + +#### Evaluate SmolVLA + +```bash +lerobot-eval \ + --policy.path=LightwheelAI/smolvla-double-piper-pnp \ + --env.type=isaaclab_arena \ + --rename_map='{"observation.images.left_hand_camera_rgb": "observation.images.left_hand", "observation.images.right_hand_camera_rgb": "observation.images.right_hand", "observation.images.first_person_camera_rgb": "observation.images.first_person"}' \ + --env.hub_path=LightwheelAI/lw_benchhub_env \ + --env.kwargs='{"config_path": "configs/envhub/example.yml"}' \ + --trust_remote_code=true \ + --env.state_keys=joint_pos \ + --env.action_dim=12 \ + --env.camera_keys=left_hand_camera_rgb,right_hand_camera_rgb,first_person_camera_rgb \ + --policy.device=cuda \ + --eval.batch_size=10 \ + --eval.n_episodes=100 +``` + +### Environment Configuration + +Evaluation can be quickly launched by modifying the `robot`, `task`, and `layout` settings in the configuration file. + +#### Full Configuration Options + +```yml +# ========================= +# Basic Settings +# ========================= +disable_fabric: false +device: cuda:0 +sensitivity: 1.0 +step_hz: 50 +enable_cameras: true +execute_mode: eval +episode_length_s: 20.0 # Episode length in seconds, increase if episodes timeout during eval + +# ========================= +# Robot Settings +# ========================= +robot: DoublePiper-Abs # Robot type, DoublePiper-Abs, X7S-Abs, G1-Controller or G1-Controller-DecoupledWBC +robot_scale: 1.0 + +# ========================= +# Task & Scene Settings +# ========================= +task: L90K1PutTheBlackBowlOnThePlate # Task name +scene_backend: robocasa +task_backend: robocasa +debug_assets: null +layout: libero-1-1 # Layout and style ID +sources: + - objaverse + - lightwheel + - aigen_objs +object_projects: [] +usd_simplify: false +seed: 42 + +# ========================= +# Object Placement Retry Settings +# ========================= +max_scene_retry: 4 +max_object_placement_retry: 3 + +resample_objects_placement_on_reset: true +resample_robot_placement_on_reset: true + +# ========================= +# Replay Configuration Settings +# ========================= +replay_cfgs: + add_camera_to_observation: true + render_resolution: [640, 480] +``` + +### See Also + +- [LW-BenchHub GitHub](https://github.com/LightwheelAI/LW-BenchHub) +- [LW-BenchHub Documentation](https://docs.lightwheel.net/lw_benchhub/) diff --git a/lerobot/docs/source/envhub_leisaac.mdx b/lerobot/docs/source/envhub_leisaac.mdx new file mode 100644 index 0000000000000000000000000000000000000000..2537700a55a24fa365311ec51626783a375fa839 --- /dev/null +++ b/lerobot/docs/source/envhub_leisaac.mdx @@ -0,0 +1,302 @@ +# LeIsaac × LeRobot EnvHub + +LeRobot EnvHub now supports **imitation learning in simulation** with LeIsaac. +Spin up everyday manipulation tasks, teleoperate the robot, collect demos, push them to the Hub, and train policies in LeRobot — all in one loop. + +[LeIsaac](https://github.com/LightwheelAI/leisaac) integrates with IsaacLab and the SO101 Leader/Follower setup to provide: + +- 🕹️ **Teleoperation-first workflows** for data collection +- 📦 **Built-in data conversion** ready for LeRobot training +- 🤖 **Everyday skills** like picking oranges, lifting cubes, cleaning tables, and folding cloth +- ☁️ **Ongoing upgrades** from [LightWheel](https://lightwheel.ai/): cloud simulation, EnvHub support, Sim2Real tooling, and more + +Below you’ll find the currently supported LeIsaac tasks exposed through LeRobot EnvHub. + +# Available Environments + +The following table lists all available tasks and environments in LeIsaac x LeRobot Envhub. You can also get the latest list of environments by running the following command: + +```bash +python scripts/environments/list_envs.py +``` + +| Task | Environment ID | Task Description | Related Robot | +| :-------------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------- | :--------------------------------------------------------- | +| | [LeIsaac-SO101-PickOrange-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/pick_orange/pick_orange_env_cfg.py)

[LeIsaac-SO101-PickOrange-Direct-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/pick_orange/direct/pick_orange_env.py) | Pick three oranges and put them into the plate, then reset the arm to rest state. | Single-Arm SO101 Follower | +| | [LeIsaac-SO101-LiftCube-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/lift_cube/lift_cube_env_cfg.py)

[LeIsaac-SO101-LiftCube-Direct-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/lift_cube/direct/lift_cube_env.py) | Lift the red cube up. | Single-Arm SO101 Follower | +| | [LeIsaac-SO101-CleanToyTable-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/clean_toy_table/clean_toy_table_env_cfg.py)

[LeIsaac-SO101-CleanToyTable-BiArm-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/clean_toy_table/clean_toy_table_bi_arm_env_cfg.py)

[LeIsaac-SO101-CleanToyTable-BiArm-Direct-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/clean_toy_table/direct/clean_toy_table_bi_arm_env.py) | Pick two letter e objects into the box, and reset the arm to rest state. | Single-Arm SO101 Follower

Bi-Arm SO101 Follower | +| | [LeIsaac-SO101-FoldCloth-BiArm-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/fold_cloth/fold_cloth_bi_arm_env_cfg.py)

[LeIsaac-SO101-FoldCloth-BiArm-Direct-v0](https://github.com/LightwheelAI/leisaac/blob/main/source/leisaac/leisaac/tasks/fold_cloth/direct/fold_cloth_bi_arm_env.py) | Fold the cloth, and reset the arm to rest state.

_Note: Only the DirectEnv support check_success in this task._ | Bi-Arm SO101 Follower | + +# Load LeIsaac directly in LeRobot with one line of code + +> EnvHub: Share LeIsaac environments through HuggingFace + +[EnvHub](https://huggingface.co/docs/lerobot/envhub) is our reproducible environment hub, spin up a packaged simulation with one line, experiment immediately, and publish your own tasks for the community. + +LeIsaac offers EnvHub support so you can consume or share tasks with only a few commands. + +