Spaces:

Zhaoting123
/

Hdf5_data_visualization

Sleeping

File size: 44,884 Bytes

4dfb808
e80f0b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7db5fc
e80f0b4
4dfb808
 
f7db5fc
4dfb808
 
 
 
 
 
e80f0b4
 
 
 
 
 
f7db5fc
 
 
 
 
4dfb808
 
d85b559
ccaa72d
d85b559
 
 
 
 
fdf3717
d85b559
77e7303
d85b559
77e7303
 
 
 
 
 
 
 
9c8b386
77e7303
 
fdf3717
d85b559
 
 
ccaa72d
f7db5fc
4dfb808
a54b7c3
2fb0eb7
 
f7db5fc
 
 
 
 
7927dd6
 
f7db5fc
7927dd6
 
4dfb808
d85b559
 
 
 
 
 
7927dd6
d85b559
 
e80f0b4
d85b559
 
 
fdf3717
 
 
 
 
 
 
 
 
 
 
 
 
 
d85b559
 
e80f0b4
4dfb808
f7db5fc
 
7927dd6
 
 
 
f7db5fc
 
d85b559
 
 
f7db5fc
 
e80f0b4
7927dd6
f7db5fc
 
7927dd6
 
 
 
e80f0b4
7927dd6
 
e80f0b4
7927dd6
f7db5fc
 
d85b559
 
7927dd6
f7db5fc
 
7927dd6
d85b559
 
7927dd6
f7db5fc
 
 
 
 
 
e80f0b4
f7db5fc
7927dd6
 
f7db5fc
 
 
 
e80f0b4
f7db5fc
 
7927dd6
 
f7db5fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7927dd6
 
f7db5fc
 
 
 
 
7927dd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7db5fc
 
 
 
 
 
d85b559
 
 
f7db5fc
 
 
 
 
7927dd6
f7db5fc
 
e80f0b4
7927dd6
f7db5fc
7927dd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7db5fc
 
 
e80f0b4
f7db5fc
 
 
7927dd6
 
e80f0b4
 
 
 
 
 
7927dd6
 
 
 
 
 
e80f0b4
 
 
 
 
 
 
 
 
 
 
f7db5fc
 
4dfb808
 
 
3cd8b50
 
 
 
 
 
 
4dfb808
3cd8b50
 
 
 
 
 
 
 
 
 
4dfb808
3cd8b50
4dfb808
 
 
7927dd6
3cd8b50
7927dd6
 
4dfb808
3cd8b50
 
 
 
 
7927dd6
 
 
 
 
3cd8b50
 
7927dd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7db5fc
4dfb808
 
f7db5fc
4dfb808
 
7927dd6
 
 
f7db5fc
 
4dfb808
7927dd6
4dfb808
e80f0b4
4dfb808
7927dd6
 
 
4dfb808
 
7927dd6
 
 
4dfb808
 
 
7927dd6
f7db5fc
 
7927dd6
f7db5fc
 
7927dd6
4dfb808
 
7927dd6
4dfb808
 
 
7927dd6
4dfb808
 
 
 
f7db5fc
4dfb808
7927dd6
4dfb808
 
 
 
 
7927dd6
4dfb808
 
7927dd6
4dfb808
 
f7db5fc
7927dd6
4dfb808
 
 
 
 
7927dd6
 
f7db5fc
7927dd6
 
 
 
4dfb808
 
7927dd6
4dfb808
 
 
 
 
 
 
 
7927dd6
4dfb808
f7db5fc
7927dd6
 
4dfb808
 
 
 
 
7927dd6
 
 
 
 
 
 
 
4dfb808
 
 
 
 
 
 
 
 
7927dd6
4dfb808
7927dd6
4dfb808
 
ca76c1e
e80f0b4
ca76c1e
 
 
 
 
 
 
 
 
 
 
e80f0b4
ca76c1e
 
 
 
 
 
 
 
cdbfe0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b8b9e9d
cdbfe0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b8b9e9d
 
cdbfe0c
 
 
 
 
 
 
b8b9e9d
 
 
cdbfe0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca76c1e
cdbfe0c
ca76c1e
 
cdbfe0c
ca76c1e
 
e80f0b4
ca76c1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e80f0b4
cdbfe0c
ca76c1e
 
e80f0b4
 
ca76c1e
 
 
80f274e
 
 
 
 
b8b9e9d
 
 
 
80f274e
b8b9e9d
 
e80f0b4
80f274e
 
b8b9e9d
80f274e
 
 
 
b8b9e9d
 
 
80f274e
 
 
b8b9e9d
80f274e
 
 
b8b9e9d
80f274e
 
 
 
b8b9e9d
 
 
80f274e
 
 
 
 
 
 
 
 
b8b9e9d
 
 
 
 
 
 
 
 
 
 
80f274e
 
 
 
b8b9e9d
80f274e
 
 
 
 
 
e80f0b4
b8b9e9d
e80f0b4
 
 
 
 
 
 
 
 
 
16b8b5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2fb0eb7
16b8b5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2fb0eb7
 
16b8b5e
 
 
2fb0eb7
 
 
16b8b5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a54b7c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2fb0eb7
e80f0b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a54b7c3
 
 
 
 
e80f0b4
 
 
 
a54b7c3
 
 
 
 
 
 
 
 
 
 
e80f0b4
 
a54b7c3
 
e80f0b4
a54b7c3
 
 
e80f0b4
 
 
 
 
 
 
 
 
 
a54b7c3
80f274e
e80f0b4
a54b7c3
e80f0b4
 
 
a54b7c3
 
e80f0b4
a54b7c3
 
80f274e
a54b7c3
e80f0b4
 
d85b559
 
7927dd6
 
 
 
d85b559
4dfb808
 
f7db5fc
4dfb808
7927dd6
f7db5fc
4dfb808
7927dd6
 
4dfb808
 
f7db5fc
7927dd6
 
4dfb808
 
 
7927dd6
 
 
 
 
 
 
 
 
fdf3717
 
7927dd6
 
e80f0b4
fdf3717
7927dd6
 
 
 
 
fdf3717
7927dd6
 
 
 
 
 
e80f0b4
fdf3717
7927dd6
 
 
 
 
 
fdf3717
7927dd6
 
 
d85b559
 
 
7927dd6
 
 
 
d85b559
7927dd6
 
4dfb808
7927dd6
 
4dfb808
 
 
e80f0b4
d85b559
 
f7db5fc
7927dd6
 
 
 
 
4dfb808
7927dd6
4dfb808
f7db5fc
 
 
4dfb808
 
 
 
 
 
7927dd6
ca76c1e
e80f0b4
ca76c1e
e80f0b4
ca76c1e
 
 
cdbfe0c
4dfb808
3cd8b50
 
 
 
 
 
 
 
4dfb808
7927dd6
 
 
 
 
 
 
 
 
 
cdbfe0c
 
 
4dfb808
7927dd6
 
3cd8b50
 
 
4dfb808
 
7927dd6
 
 
 
4dfb808
cdbfe0c
4dfb808
 
 
7927dd6
 
4dfb808
d85b559
7927dd6
 
4dfb808
7927dd6
4dfb808
7927dd6
 
fdf3717
4dfb808
f7db5fc
4dfb808
7927dd6
cdbfe0c
 
4dfb808
 
7927dd6
 
4dfb808
d85b559
 
 
 
 
 
e80f0b4
 
d85b559
e80f0b4
7927dd6
4dfb808
e80f0b4
 
4dfb808
 
e80f0b4
cdbfe0c
a54b7c3
fdf3717
4dfb808
ca76c1e
 
 
e80f0b4
 
2fb0eb7
ca76c1e
e80f0b4
4dfb808
16b8b5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b8b9e9d
3c79c68
 
 
 
 
d85b559
 
 
 
 
 
 
 
fdf3717
d85b559
 
e80f0b4
cdbfe0c
d85b559
 
 
 
 
fdf3717
d85b559
 
 
 
fdf3717
d85b559
4dfb808
 
 
d85b559
4dfb808
 
 
e80f0b4
cdbfe0c
d85b559
 
 
 
e80f0b4
cdbfe0c
4dfb808
 
a54b7c3
4dfb808
 
e80f0b4
cdbfe0c
4dfb808
 
 
 
e80f0b4
cdbfe0c
4dfb808
 
ca76c1e
 
e80f0b4
ca76c1e
 
 
e80f0b4
 
a54b7c3
e80f0b4
 
 
7927dd6
 
 
 
 
 
4dfb808
7927dd6
 
fdf3717
4dfb808
 
e80f0b4
cdbfe0c
4dfb808
 
 
 
 
 
cabf23d


# Standalone Hugging Face Space viewer for TrajectoryBuffer-style HDF5 files.
#
# requirements.txt:
# gradio
# huggingface_hub
# h5py
# numpy
# pillow
# matplotlib
# imageio
# imageio-ffmpeg
#
# Optional:
# opencv-python-headless

import os
import re
import tempfile
from functools import lru_cache

import gradio as gr
import h5py
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
from huggingface_hub import hf_hub_download
from PIL import Image, ImageDraw

try:
    import imageio.v2 as imageio
except Exception:
    imageio = None

try:
    import cv2
except Exception:
    cv2 = None


DATASET_PRESETS = {
    "Robosuite Square Correction": {
        "repo_id": "Zhaoting123/Robosuite_Square_image_abs_with_state",
        "filename": (
            "20260410_205606_Diffusion_CLIC_intervention_Circular_square_image_abs_"
            "Ta16_offlineFalse_Scale0.01/trajectory_buffer_0.hdf5"
        ),
        "default_reverse_channels": False,
    },
    "InsertT Demonstration": {
        "repo_id": "Zhaoting123/InsertT",
        "filename": "trajectory_buffer_Nov10_demo.hdf5",
        "default_reverse_channels": True,
    },
    "InsertT Correction": {
        "repo_id": "Zhaoting123/InsertT",
        "filename": "trajectory_buffer_Nov11_intervention.hdf5",
        "default_reverse_channels": True,
    },
    "RoundTable Correction": {
        "repo_id": "Zhaoting123/Furniture_Bench_Round_Table_Assembly",
        "filename": "trajectory_buffer_0_Nov24_intervention_relabeled.hdf5",
        "default_reverse_channels": True,
    },
}

DEFAULT_PRESET = "Robosuite Square Correction"
REPO_TYPE = "dataset"
DEFAULT_CHUNK_LEN = 16
DEFAULT_DISPLAY_SCALE = 1
VIDEO_STATUS_FIGSIZE = (6.0, 1.8)
VIDEO_STATUS_DPI = 120
PREFERRED_IMAGE_KEYS = [
    "image1",
    "image2",
    "agentview_image",
    "robot0_eye_in_hand_image",
    "front_image",
    "wrist_image",
]
IMAGE_KEY_HINTS = ["rgb", "image", "img", "camera", "cam"]


def resolve_dataset(preset_name, custom_repo_id=None, custom_filename=None):
    preset_name = preset_name or DEFAULT_PRESET
    if preset_name == "Custom":
        repo_id = str(custom_repo_id or "").strip()
        filename = str(custom_filename or "").strip()
        if not repo_id or not filename:
            raise ValueError("For Custom mode, provide both repo_id and HDF5 filename/path.")
        return repo_id, filename

    item = DATASET_PRESETS.get(preset_name, DATASET_PRESETS[DEFAULT_PRESET])
    return item["repo_id"], item["filename"]


def get_default_reverse_channels(preset_name):
    """Dataset-specific default for BGR<->RGB reversal.

    Robosuite Square presets use normal RGB ordering.
    InsertT / PushT-style preset requires reversal.
    Custom datasets default to False so users can still override manually.
    """
    preset_name = preset_name or DEFAULT_PRESET
    if preset_name == "Custom":
        return False
    item = DATASET_PRESETS.get(preset_name, DATASET_PRESETS[DEFAULT_PRESET])
    return bool(item.get("default_reverse_channels", False))


@lru_cache(maxsize=8)
def get_local_hdf5_path(repo_id, filename):
    return hf_hub_download(repo_id=repo_id, filename=filename, repo_type=REPO_TYPE)


def _natural_sort_key(name):
    match = re.search(r"([0-9]+)$", str(name))
    if match:
        return 0, int(match.group(1))
    return 1, str(name)


@lru_cache(maxsize=8)
def get_trajectory_keys(repo_id, filename):
    path = get_local_hdf5_path(repo_id, filename)
    with h5py.File(path, "r") as f:
        root_episode_keys = [
            key for key in f.keys()
            if isinstance(f[key], h5py.Group) and str(key).startswith("episode_")
        ]
        if root_episode_keys:
            return tuple(sorted(root_episode_keys, key=_natural_sort_key))

        if "data" in f and isinstance(f["data"], h5py.Group):
            data_group = f["data"]
            keys = [key for key in data_group.keys() if isinstance(data_group[key], h5py.Group)]
            return tuple("data/" + key for key in sorted(keys, key=_natural_sort_key))

        keys = [key for key in f.keys() if isinstance(f[key], h5py.Group)]
        return tuple(sorted(keys, key=_natural_sort_key))


@lru_cache(maxsize=8)
def get_num_trajectories(repo_id, filename):
    return len(get_trajectory_keys(repo_id, filename))


def inspect_hdf5_tree(preset_name, custom_repo_id, custom_filename, max_lines=180):
    repo_id, filename = resolve_dataset(preset_name, custom_repo_id, custom_filename)
    path = get_local_hdf5_path(repo_id, filename)

    lines = []
    with h5py.File(path, "r") as f:
        def visitor(name, obj):
            if len(lines) >= max_lines:
                return
            if isinstance(obj, h5py.Dataset):
                lines.append("DATASET {} shape={} dtype={}".format(name, obj.shape, obj.dtype))
            elif isinstance(obj, h5py.Group):
                lines.append("GROUP   {}".format(name))

        f.visititems(visitor)

    if len(lines) >= max_lines:
        lines.append("...")
    return "\n".join(lines) if lines else "No HDF5 contents found."


def _read_dataset_value(dataset):
    value = dataset[()]
    if isinstance(value, bytes):
        return value.decode("utf-8")
    return value


def _read_group_recursive(group):
    out = {}
    for key, obj in group.items():
        if isinstance(obj, h5py.Dataset):
            out[key] = _read_dataset_value(obj)
        elif isinstance(obj, h5py.Group):
            out[key] = _read_group_recursive(obj)
    return out


def _find_first_key(mapping, candidate_keys):
    for key in candidate_keys:
        if key in mapping:
            return key
    return None


def _infer_time_length(data):
    for key in ["timesteps", "dones", "robot_actions", "teacher_actions", "actions"]:
        if key in data:
            arr = np.asarray(data[key])
            if arr.ndim >= 1:
                return int(arr.shape[0])

    obs_group = None
    if isinstance(data.get("observation"), dict):
        obs_group = data["observation"]
    elif isinstance(data.get("obs"), dict):
        obs_group = data["obs"]

    if obs_group:
        lengths = []
        for value in obs_group.values():
            arr = np.asarray(value)
            if arr.ndim >= 1:
                lengths.append(int(arr.shape[0]))
        if lengths:
            values, counts = np.unique(lengths, return_counts=True)
            return int(values[np.argmax(counts)])
    return 1


def _slice_time(value, t, T):
    arr = np.asarray(value)
    if arr.ndim >= 1 and arr.shape[0] == T:
        return arr[t]
    return arr


@lru_cache(maxsize=64)
def load_traj(repo_id, filename, traj_id):
    traj_keys = get_trajectory_keys(repo_id, filename)
    if not traj_keys:
        return []

    traj_id = int(np.clip(int(traj_id), 0, len(traj_keys) - 1))
    traj_key = traj_keys[traj_id]
    path = get_local_hdf5_path(repo_id, filename)

    with h5py.File(path, "r") as f:
        data = _read_group_recursive(f[traj_key])

    T = _infer_time_length(data)

    if isinstance(data.get("observation"), dict):
        obs_all = data["observation"]
    elif isinstance(data.get("obs"), dict):
        obs_all = data["obs"]
    else:
        obs_all = {}

    action_key = _find_first_key(data, ["actions", "action"])
    teacher_key = _find_first_key(data, ["teacher_actions", "teacher_action"])
    robot_key = _find_first_key(data, ["robot_actions", "robot_action"])
    no_teacher_key = _find_first_key(data, ["no_teacher_actions", "no_teacher_action"])
    no_robot_key = _find_first_key(data, ["no_robot_actions", "no_robot_action"])
    done_key = _find_first_key(data, ["dones", "done"])
    timestep_key = _find_first_key(data, ["timesteps", "timestep"])
    success_key = _find_first_key(data, ["if_success", "success", "successes"])

    traj = []
    for t in range(T):
        obs_t = {key: _slice_time(value, t, T) for key, value in obs_all.items()}

        default_action = np.zeros(1, dtype=np.float32)
        if action_key is not None:
            default_action = _slice_time(data[action_key], t, T)

        teacher_action = _slice_time(data[teacher_key], t, T) if teacher_key else default_action
        robot_action = _slice_time(data[robot_key], t, T) if robot_key else default_action
        no_teacher = _slice_time(data[no_teacher_key], t, T) if no_teacher_key else False
        no_robot = _slice_time(data[no_robot_key], t, T) if no_robot_key else False
        done = _slice_time(data[done_key], t, T) if done_key else False
        if_success = _slice_time(data[success_key], t, T) if success_key else False

        timestep = t
        if timestep_key is not None:
            timestep_arr = _slice_time(data[timestep_key], t, T)
            timestep = int(np.asarray(timestep_arr).reshape(-1)[0])

        traj.append({
            "obs": obs_t,
            "robot_action": np.asarray(robot_action),
            "teacher_action": np.asarray(teacher_action),
            "done": bool(np.asarray(done).reshape(-1)[0]),
            "timestep": timestep,
            "no_robot_action": bool(np.asarray(no_robot).reshape(-1)[0]),
            "no_teacher_action": bool(np.asarray(no_teacher).reshape(-1)[0]),
            "episode_id": traj_key,
            "if_success": bool(np.asarray(if_success).reshape(-1)[0]),
        })

    return traj


def _extract_latest_obs_value(value):
    """Return the latest stacked observation only when there is a clear stack axis.

    Important:
    - [obs_T, C, H, W] or [obs_T, H, W, C] should become the latest frame.
    - [C, H, W] must NOT be sliced, otherwise an RGB image becomes one
      grayscale channel.
    """
    arr = np.asarray(value)

    # Stacked image observations, e.g. [obs_T, C, H, W] or [obs_T, H, W, C].
    if arr.ndim == 4 and arr.shape[0] in (1, 2, 3, 4):
        channel_first = arr.shape[1] in (1, 3, 4)
        channel_last = arr.shape[-1] in (1, 3, 4)
        if channel_first or channel_last:
            return arr[-1]

    # Stacked vector observations, e.g. [obs_T, D]. Keep this for non-image obs.
    if arr.ndim == 2 and arr.shape[0] in (1, 2):
        return arr[-1]

    return arr


def _looks_like_image_array(key, value):
    arr = np.asarray(value)
    key_l = str(key).lower()
    key_hint = any(hint in key_l for hint in IMAGE_KEY_HINTS)

    # Remove only a clear stacked-image axis for shape detection.
    if arr.ndim == 4 and arr.shape[0] in (1, 2, 3, 4):
        if arr.shape[1] in (1, 3, 4) or arr.shape[-1] in (1, 3, 4):
            arr = arr[-1]

    shape_hint = False
    if arr.ndim == 2:
        shape_hint = True
    elif arr.ndim == 3:
        shape_hint = arr.shape[-1] in (1, 3, 4) or arr.shape[0] in (1, 3, 4)
    elif arr.ndim == 4:
        shape_hint = arr.shape[1] in (1, 3, 4) or arr.shape[-1] in (1, 3, 4)

    return key_hint or shape_hint


def _float_img_to_uint8(img):
    arr = img.astype(np.float32)
    arr_min = float(np.nanmin(arr))
    arr_max = float(np.nanmax(arr))

    if arr_min >= -1.01 and arr_max <= 1.01:
        if arr_min < 0.0:
            arr = (arr + 1.0) * 0.5
        arr = np.clip(arr, 0.0, 1.0) * 255.0
    elif arr_max <= 255.0:
        arr = np.clip(arr, 0.0, 255.0)
    else:
        arr = 255.0 * (arr - arr_min) / max(arr_max - arr_min, 1e-8)

    return np.round(arr).astype(np.uint8)


def _extract_display_image(value, reverse_channels=False):
    img = np.asarray(_extract_latest_obs_value(value))

    if img.ndim == 2:
        img = np.repeat(img[..., None], 3, axis=-1)
    elif img.ndim == 3 and img.shape[0] in (1, 3, 4):
        img = np.transpose(img, (1, 2, 0))

    if img.ndim == 3 and img.shape[-1] == 1:
        img = np.repeat(img, 3, axis=-1)
    elif img.ndim == 3 and img.shape[-1] == 4:
        img = img[..., :3]

    if img.ndim != 3:
        raise ValueError("Unsupported image shape: {}".format(img.shape))

    out = img.copy() if img.dtype == np.uint8 else _float_img_to_uint8(img)

    if reverse_channels and out.shape[-1] == 3:
        out = out[..., ::-1]
    return out


def _resize_image_for_display(img, display_scale):
    scale = float(display_scale)
    if scale == 1.0:
        return img

    h, w = img.shape[:2]
    new_size = (max(1, int(round(w * scale))), max(1, int(round(h * scale))))

    if cv2 is not None:
        return cv2.resize(img, new_size, interpolation=cv2.INTER_NEAREST)

    pil_img = Image.fromarray(img)
    return np.asarray(pil_img.resize(new_size, resample=Image.Resampling.NEAREST))


def _extract_mixed_action_chunk(traj, start_idx, chunk_length):
    chunk = []
    sources = []
    end_idx = min(len(traj), int(start_idx) + int(chunk_length))

    for idx in range(int(start_idx), end_idx):
        step = traj[idx]
        use_teacher = not bool(step.get("no_teacher_action", False))
        action = step["teacher_action"] if use_teacher else step["robot_action"]
        chunk.append(np.asarray(action, dtype=np.float32).reshape(-1))
        sources.append("T" if use_teacher else "R")

    if not chunk:
        return None, ""
    return np.stack(chunk, axis=0), "".join(sources)


def _extract_robot_action_chunk(traj, start_idx, chunk_length):
    chunk = []
    end_idx = min(len(traj), int(start_idx) + int(chunk_length))

    for idx in range(int(start_idx), end_idx):
        step = traj[idx]
        chunk.append(np.asarray(step["robot_action"], dtype=np.float32).reshape(-1))

    if not chunk:
        return None
    return np.stack(chunk, axis=0)


def _safe_array_str(value, precision=3, max_items=24):
    arr = np.asarray(value).reshape(-1)
    shown = arr[:max_items]
    text = np.array2string(shown, precision=precision, separator=", ")
    if arr.size > max_items:
        text += " ... +{} more".format(arr.size - max_items)
    return text


def _make_action_chunk_plot(mixed_chunk, robot_chunk):
    if mixed_chunk is None:
        return None

    mixed_chunk = np.asarray(mixed_chunk, dtype=np.float32)
    if mixed_chunk.ndim == 1:
        mixed_chunk = mixed_chunk[:, None]

    fig, ax = plt.subplots(figsize=(7, 3.2), dpi=140)
    x = np.arange(mixed_chunk.shape[0])
    max_dims = min(mixed_chunk.shape[1], 10)

    for dim in range(max_dims):
        ax.plot(x, mixed_chunk[:, dim], label="mixed[{}]".format(dim))

    if robot_chunk is not None:
        robot_chunk = np.asarray(robot_chunk, dtype=np.float32)
        if robot_chunk.ndim == 1:
            robot_chunk = robot_chunk[:, None]
        for dim in range(min(robot_chunk.shape[1], max_dims)):
            ax.plot(
                x,
                robot_chunk[:, dim],
                linestyle="--",
                alpha=0.55,
                label="robot[{}]".format(dim),
            )

    ax.set_title("Action chunk")
    ax.set_xlabel("chunk step")
    ax.set_ylabel("action value")
    ax.grid(True, alpha=0.3)
    ax.legend(loc="upper right", fontsize=7, ncol=2)
    fig.tight_layout()
    fig.canvas.draw()
    rgba = np.asarray(fig.canvas.buffer_rgba())
    image = rgba[..., :3].copy()
    plt.close(fig)
    return image


@lru_cache(maxsize=8192)
def get_cached_gallery_items(repo_id, filename, traj_id, timestep, image_keys_tuple, display_scale, reverse_channels):
    traj = load_traj(repo_id, filename, int(traj_id))
    timestep = int(np.clip(int(timestep), 0, len(traj) - 1))
    obs = traj[timestep].get("obs", {})

    gallery_items = []
    warnings = []
    for key in image_keys_tuple:
        if key not in obs:
            warnings.append("Missing image key: {}".format(key))
            continue
        try:
            img = _extract_display_image(obs[key], reverse_channels=bool(reverse_channels))
            img = _resize_image_for_display(img, float(display_scale))
            gallery_items.append((img, key))
        except Exception as exc:
            warnings.append("{}: {}".format(key, exc))

    return gallery_items, tuple(warnings)


def _compute_valid_start_indices(traj, min_seq_len):
    """Match the original local script's valid-start heuristic.

    A timestep is valid when the following min_seq_len steps all have
    no_teacher_action == False.
    """
    total_steps = len(traj)
    min_seq_len = int(max(1, min_seq_len))
    no_teacher = np.asarray(
        [int(bool(step.get("no_teacher_action", False))) for step in traj],
        dtype=np.int32,
    )

    valid_indices = []
    max_start = total_steps - min_seq_len + 1
    for t in range(max(0, max_start)):
        if int(np.sum(no_teacher[t:t + min_seq_len])) == 0:
            valid_indices.append(t)

    return no_teacher, valid_indices


def _make_trajectory_status_plot(traj, timestep, min_seq_len):
    """Render the same high-level status figure as the local matplotlib tool.

    Shows:
      - orange no_teacher_action step plot
      - green triangles for algorithmic valid start points
      - black vertical cursor at current timestep
    """
    total_steps = len(traj)
    if total_steps == 0:
        return None, False, 0

    timestep = int(np.clip(int(timestep), 0, total_steps - 1))
    timesteps = np.asarray(
        [int(np.asarray(step.get("timestep", idx)).reshape(-1)[0]) for idx, step in enumerate(traj)],
        dtype=np.int32,
    )
    no_teacher, valid_indices = _compute_valid_start_indices(traj, min_seq_len)
    is_valid_start = timestep in set(valid_indices)

    fig, ax = plt.subplots(figsize=(10.5, 2.8), dpi=170)

    ax.step(
        np.arange(total_steps),
        no_teacher,
        where="post",
        label="no_teacher_action",
        color="orange",
    )

    if valid_indices:
        ax.scatter(
            valid_indices,
            [-0.15] * len(valid_indices),
            color="green",
            marker="^",
            s=18,
            label="Valid Start (len >= {})".format(int(min_seq_len)),
        )

    ax.axvline(timestep, color="black", linestyle="-", alpha=0.85, linewidth=1.5)
    ax.set_xlim(0, max(total_steps - 1, 1))
    ax.set_ylim(-0.38, 1.1)
    ax.set_ylabel("Flag", fontsize=10)
    ax.set_xlabel("Timestep index", fontsize=10)
    ax.set_yticks([0, 1])
    ax.set_yticklabels(["False", "True"])
    ax.grid(True, axis="x", alpha=0.2)

    title = "no_teacher_action | step {} / {}".format(timestep, total_steps - 1)
    if is_valid_start:
        title += " | VALID START"
    ax.set_title(title, fontsize=11)
    ax.tick_params(axis="both", labelsize=9)
    ax.legend(loc="upper right", fontsize=9)

    # Add saved timestep annotation if the stored timestep is not the same as index.
    saved_timestep = int(timesteps[timestep]) if len(timesteps) else timestep
    if saved_timestep != timestep:
        ax.text(
            0.01,
            0.04,
            "saved timestep: {}".format(saved_timestep),
            transform=ax.transAxes,
            fontsize=8,
            va="bottom",
            ha="left",
        )

    fig.tight_layout()
    fig.canvas.draw()
    rgba = np.asarray(fig.canvas.buffer_rgba())
    image = rgba[..., :3].copy()
    plt.close(fig)

    return image, bool(is_valid_start), len(valid_indices)


@lru_cache(maxsize=8192)
def get_cached_status_plot(repo_id, filename, traj_id, timestep, min_seq_len):
    traj = load_traj(repo_id, filename, int(traj_id))
    timestep = int(np.clip(int(timestep), 0, len(traj) - 1))
    return _make_trajectory_status_plot(traj, timestep, int(min_seq_len))


def preload_current_trajectory(preset_name, custom_repo_id, custom_filename, traj_id, image_keys, chunk_len, display_scale, reverse_channels):
    repo_id, filename = resolve_dataset(preset_name, custom_repo_id, custom_filename)
    n_traj = get_num_trajectories(repo_id, filename)
    if n_traj == 0:
        return "No trajectories found."

    traj_id = int(np.clip(int(traj_id), 0, n_traj - 1))
    traj = load_traj(repo_id, filename, traj_id)
    if not traj:
        return "Trajectory could not be loaded."

    if image_keys is None:
        image_keys = []
    if isinstance(image_keys, str):
        image_keys = [image_keys]
    image_keys_tuple = tuple(image_keys)

    total = len(traj)
    for t in range(total):
        get_cached_gallery_items(repo_id, filename, traj_id, t, image_keys_tuple, float(display_scale), bool(reverse_channels))
        get_cached_status_plot(repo_id, filename, traj_id, t, int(chunk_len))

    status = "Preloaded trajectory {}".format(traj_id)
    status += "\nFrames cached: {}".format(total)
    status += "\nImage keys: {}".format(", ".join(image_keys_tuple) if image_keys_tuple else "none")
    return status


def _compose_video_frame(gallery_items, frame_label, status_plot=None):
    """Compose one video frame.

    Top: selected observation images.
    Bottom: trajectory-status plot with the moving timestep cursor.

    Important: do NOT downscale the status plot to the image width. The plot
    contains tick labels and a legend, so preserving its native width makes the
    generated MP4 much more readable.
    """
    small_text_y = 3

    if not gallery_items:
        obs_canvas = Image.new("RGB", (640, 360), color=(20, 20, 20))
        draw = ImageDraw.Draw(obs_canvas)
        draw.text((8, small_text_y), "No selected image keys", fill=(255, 255, 255))
    else:
        pil_images = []
        for img, label in gallery_items:
            pil_img = Image.fromarray(np.asarray(img, dtype=np.uint8)).convert("RGB")

            # Keep the image-key caption compact; large captions waste video space.
            label_h = 16
            panel = Image.new("RGB", (pil_img.width, pil_img.height + label_h), color=(0, 0, 0))
            panel.paste(pil_img, (0, label_h))
            draw = ImageDraw.Draw(panel)
            draw.text((4, small_text_y), str(label), fill=(220, 220, 220))
            pil_images.append(panel)

        gap = 8
        top_h = 18
        width = sum(im.width for im in pil_images) + gap * max(len(pil_images) - 1, 0)
        height = max(im.height for im in pil_images) + top_h
        obs_canvas = Image.new("RGB", (width, height), color=(0, 0, 0))
        draw = ImageDraw.Draw(obs_canvas)

        # Compact frame label above the image panels.
        draw.text((6, small_text_y), frame_label, fill=(220, 220, 220))

        x = 0
        for im in pil_images:
            obs_canvas.paste(im, (x, top_h))
            x += im.width + gap

    if status_plot is not None:
        status_img = Image.fromarray(np.asarray(status_plot, dtype=np.uint8)).convert("RGB")

        # Preserve the status plot resolution. If needed, pad the observation
        # canvas to the same width and center it above the plot.
        final_w = max(obs_canvas.width, status_img.width)
        if obs_canvas.width < final_w:
            padded_obs = Image.new("RGB", (final_w, obs_canvas.height), color=(0, 0, 0))
            padded_obs.paste(obs_canvas, ((final_w - obs_canvas.width) // 2, 0))
            obs_canvas = padded_obs
        elif status_img.width < final_w:
            padded_status = Image.new("RGB", (final_w, status_img.height), color=(255, 255, 255))
            padded_status.paste(status_img, ((final_w - status_img.width) // 2, 0))
            status_img = padded_status

        gap_h = 8
        canvas = Image.new(
            "RGB",
            (final_w, obs_canvas.height + gap_h + status_img.height),
            color=(0, 0, 0),
        )
        canvas.paste(obs_canvas, (0, 0))
        canvas.paste(status_img, (0, obs_canvas.height + gap_h))
    else:
        canvas = obs_canvas

    # Many MP4 encoders prefer dimensions divisible by 16.
    pad_w = int(np.ceil(canvas.width / 16.0) * 16)
    pad_h = int(np.ceil(canvas.height / 16.0) * 16)
    if pad_w != canvas.width or pad_h != canvas.height:
        padded = Image.new("RGB", (pad_w, pad_h), color=(0, 0, 0))
        padded.paste(canvas, (0, 0))
        canvas = padded

    return np.asarray(canvas)


@lru_cache(maxsize=128)
def get_video_status_plot_base(repo_id, filename, traj_id, valid_window_len):
    """Render the static part of the status plot once for video export.

    Matplotlib per frame is slow. This function draws no_teacher_action and
    valid-start markers once, records the axes pixel bounds, and returns a base
    image. The moving cursor is later drawn with PIL, which is much faster.
    """
    traj = load_traj(repo_id, filename, int(traj_id))
    total_steps = len(traj)
    if total_steps == 0:
        return None, (0, 0, 1, 1), 0

    no_teacher, valid_indices = _compute_valid_start_indices(traj, int(valid_window_len))

    fig, ax = plt.subplots(figsize=VIDEO_STATUS_FIGSIZE, dpi=VIDEO_STATUS_DPI)
    ax.step(
        np.arange(total_steps),
        no_teacher,
        where="post",
        label="no_teacher_action",
        color="orange",
    )

    if valid_indices:
        ax.scatter(
            valid_indices,
            [-0.15] * len(valid_indices),
            color="green",
            marker="^",
            s=18,
            label="Valid Start (len >= {})".format(int(valid_window_len)),
        )

    ax.set_xlim(0, max(total_steps - 1, 1))
    ax.set_ylim(-0.38, 1.1)
    ax.set_ylabel("Flag", fontsize=8)
    ax.set_xlabel("Timestep index", fontsize=8)
    ax.set_yticks([0, 1])
    ax.set_yticklabels(["False", "True"])
    ax.grid(True, axis="x", alpha=0.2)
    ax.set_title("no_teacher_action and valid starts", fontsize=9)
    ax.tick_params(axis="both", labelsize=7)
    ax.legend(loc="upper right", fontsize=7)
    fig.tight_layout()
    fig.canvas.draw()

    rgba = np.asarray(fig.canvas.buffer_rgba())
    base = rgba[..., :3].copy()

    bbox = ax.get_window_extent()
    height = base.shape[0]

    # Matplotlib bbox origin is bottom-left, image origin is top-left.
    x0 = int(round(bbox.x0))
    x1 = int(round(bbox.x1))
    y0 = int(round(height - bbox.y1))
    y1 = int(round(height - bbox.y0))

    plt.close(fig)
    return base, (x0, y0, x1, y1), total_steps


@lru_cache(maxsize=8192)
def get_cached_video_status_frame(repo_id, filename, traj_id, timestep, valid_window_len):
    """Draw the moving cursor on a cached static status plot."""
    base, bounds, total_steps = get_video_status_plot_base(
        repo_id,
        filename,
        int(traj_id),
        int(valid_window_len),
    )
    if base is None:
        return None

    timestep = int(np.clip(int(timestep), 0, max(total_steps - 1, 0)))
    x0, y0, x1, y1 = bounds
    denom = max(total_steps - 1, 1)
    x = int(round(x0 + (x1 - x0) * float(timestep) / float(denom)))

    img = Image.fromarray(np.asarray(base, dtype=np.uint8)).convert("RGB")
    draw = ImageDraw.Draw(img)

    # Moving cursor.
    draw.line([(x, y0), (x, y1)], fill=(0, 0, 0), width=4)

    # Compact step label, top-left of the plot area.
    label = "step {}/{}".format(timestep, total_steps - 1)
    draw.rectangle((x0 + 4, y0 + 4, x0 + 118, y0 + 24), fill=(255, 255, 255))
    draw.text((x0 + 8, y0 + 7), label, fill=(0, 0, 0))

    return np.asarray(img)

def _draw_status_cursor_on_base(base, bounds, total_steps, timestep):
    """Fast video status frame: copy one static Matplotlib image and draw cursor.

    This avoids calling the lru-cached per-timestep status frame function during
    video export. For long trajectories, caching thousands of status images can
    consume a lot of memory and still requires PIL conversion for every frame.
    """
    if base is None:
        return None

    total_steps = int(max(total_steps, 1))
    timestep = int(np.clip(int(timestep), 0, total_steps - 1))
    x0, y0, x1, y1 = [int(v) for v in bounds]
    denom = max(total_steps - 1, 1)
    x = int(round(x0 + (x1 - x0) * float(timestep) / float(denom)))

    img = np.asarray(base, dtype=np.uint8).copy()

    # Draw the vertical cursor directly with NumPy. This is much cheaper than
    # creating a Matplotlib plot for every frame.
    x_left = max(0, x - 2)
    x_right = min(img.shape[1], x + 2)
    y_top = max(0, y0)
    y_bottom = min(img.shape[0], y1)
    img[y_top:y_bottom, x_left:x_right, :] = 0

    # Small text label. PIL is used only for the label, not for the whole plot.
    pil_img = Image.fromarray(img).convert("RGB")
    draw = ImageDraw.Draw(pil_img)
    label = "step {}/{}".format(timestep, total_steps - 1)
    draw.rectangle((x0 + 4, y0 + 4, x0 + 126, y0 + 24), fill=(255, 255, 255))
    draw.text((x0 + 8, y0 + 7), label, fill=(0, 0, 0))
    return np.asarray(pil_img)


def _get_fast_video_writer(out_path, fps):
    """Use ffmpeg's ultrafast x264 preset for interactive Spaces exports."""
    return imageio.get_writer(
        out_path,
        fps=float(fps),
        codec="libx264",
        macro_block_size=16,
        ffmpeg_params=[
            "-preset", "ultrafast",
            "-crf", "28",
            "-pix_fmt", "yuv420p",
            "-movflags", "+faststart",
        ],
    )


def build_current_trajectory_video(preset_name, custom_repo_id, custom_filename, traj_id, image_keys, display_scale, reverse_channels, fps, valid_window_len, video_stride=4):
    if imageio is None:
        return None, "Video export requires imageio and imageio-ffmpeg in requirements.txt."

    repo_id, filename = resolve_dataset(preset_name, custom_repo_id, custom_filename)
    n_traj = get_num_trajectories(repo_id, filename)
    if n_traj == 0:
        return None, "No trajectories found."

    traj_id = int(np.clip(int(traj_id), 0, n_traj - 1))
    traj = load_traj(repo_id, filename, traj_id)
    if not traj:
        return None, "Trajectory could not be loaded."

    if image_keys is None:
        image_keys = []
    if isinstance(image_keys, str):
        image_keys = [image_keys]
    image_keys_tuple = tuple(image_keys)

    video_stride = int(max(1, int(video_stride)))
    frame_indices = list(range(0, len(traj), video_stride))
    if frame_indices and frame_indices[-1] != len(traj) - 1:
        frame_indices.append(len(traj) - 1)

    safe_repo = re.sub(r"[^A-Za-z0-9_.-]+", "_", repo_id)
    safe_file = re.sub(r"[^A-Za-z0-9_.-]+", "_", filename)[-80:]
    out_path = os.path.join(
        tempfile.gettempdir(),
        "trajectory_{}_{}_traj{:04d}_fps{}_stride{}.mp4".format(
            safe_repo, safe_file, traj_id, int(fps), video_stride
        ),
    )

    # Build the static status plot once. During export, only draw the cursor.
    status_base, status_bounds, total_steps = get_video_status_plot_base(
        repo_id,
        filename,
        traj_id,
        int(valid_window_len),
    )

    writer = _get_fast_video_writer(out_path, fps)
    written = 0
    try:
        for t in frame_indices:
            # Use the existing cached image extraction for correctness, but avoid
            # cached per-timestep status images to reduce memory pressure.
            gallery_items, _warnings = get_cached_gallery_items(
                repo_id,
                filename,
                traj_id,
                t,
                image_keys_tuple,
                float(display_scale),
                bool(reverse_channels),
            )
            label = "trajectory {} | frame {}/{}".format(traj_id, t, len(traj) - 1)
            status_plot = _draw_status_cursor_on_base(status_base, status_bounds, total_steps, t)
            frame = _compose_video_frame(gallery_items, label, status_plot=status_plot)
            writer.append_data(frame)
            written += 1
    finally:
        writer.close()

    approx_seconds = float(written) / float(max(float(fps), 1.0))
    status = "Built trajectory video with optimized encoder and status rendering"
    status += "\nTrajectory: {}".format(traj_id)
    status += "\nOriginal timesteps: {} | Written frames: {} | Stride: {}".format(len(traj), written, video_stride)
    status += "\nFPS: {} | Approx video duration: {:.1f}s".format(fps, approx_seconds)
    status += "\nValid-window length: {}".format(int(valid_window_len))
    status += "\nSpeedups: x264 ultrafast preset; static status plot rendered once; cursor drawn with NumPy/PIL"
    return out_path, status

def get_available_image_keys(repo_id, filename, traj_id):
    n_traj = get_num_trajectories(repo_id, filename)
    if n_traj == 0:
        return []

    traj_id = int(np.clip(int(traj_id), 0, n_traj - 1))
    traj = load_traj(repo_id, filename, traj_id)
    if not traj:
        return []

    obs = traj[0].get("obs", {})
    keys = []
    for key, value in obs.items():
        try:
            if _looks_like_image_array(key, value):
                keys.append(key)
        except Exception:
            pass

    ordered = [key for key in PREFERRED_IMAGE_KEYS if key in keys]
    ordered.extend([key for key in keys if key not in ordered])
    return ordered


def update_custom_visibility(preset_name):
    visible = preset_name == "Custom"
    return gr.update(visible=visible), gr.update(visible=visible)


def update_after_dataset_change(preset_name, custom_repo_id, custom_filename):
    repo_id, filename = resolve_dataset(preset_name, custom_repo_id, custom_filename)
    n_traj = get_num_trajectories(repo_id, filename)

    reverse_default = get_default_reverse_channels(preset_name)

    if n_traj == 0:
        status = "Loaded `{}` / `{}`".format(repo_id, filename)
        status += "\nDetected trajectories: 0"
        status += "\nreverse_channels default: {}".format(int(reverse_default))
        return (
            gr.update(maximum=1, value=0),
            gr.update(maximum=1, value=0),
            gr.update(choices=[], value=[]),
            status,
            gr.update(value=reverse_default),
        )

    keys = get_available_image_keys(repo_id, filename, 0)
    traj = load_traj(repo_id, filename, 0)

    status = "Loaded `{}` / `{}`".format(repo_id, filename)
    status += "\nDetected trajectories: {}".format(n_traj)
    status += "\nreverse_channels default: {}".format(int(reverse_default))

    return (
        gr.update(maximum=max(n_traj - 1, 1), value=0),
        gr.update(maximum=max(len(traj) - 1, 1), value=0),
        gr.update(choices=keys, value=keys[:2]),
        status,
        gr.update(value=reverse_default),
    )


def update_after_traj_change(preset_name, custom_repo_id, custom_filename, traj_id):
    repo_id, filename = resolve_dataset(preset_name, custom_repo_id, custom_filename)
    n_traj = get_num_trajectories(repo_id, filename)
    if n_traj == 0:
        return gr.update(maximum=1, value=0), gr.update(choices=[], value=[])

    traj_id = int(np.clip(int(traj_id), 0, n_traj - 1))
    traj = load_traj(repo_id, filename, traj_id)
    keys = get_available_image_keys(repo_id, filename, traj_id)

    return (
        gr.update(maximum=max(len(traj) - 1, 1), value=0),
        gr.update(choices=keys, value=keys[:2]),
    )


def render_frame(preset_name, custom_repo_id, custom_filename, traj_id, timestep, image_keys, chunk_len, display_scale, reverse_channels):
    repo_id, filename = resolve_dataset(preset_name, custom_repo_id, custom_filename)
    n_traj = get_num_trajectories(repo_id, filename)

    if n_traj == 0:
        return [], None, "No trajectory groups found. Open Debug: HDF5 tree."

    traj_id = int(np.clip(int(traj_id), 0, n_traj - 1))
    traj = load_traj(repo_id, filename, traj_id)
    if not traj:
        return [], None, "Trajectory could not be loaded. Open Debug: HDF5 tree."

    timestep = int(np.clip(int(timestep), 0, len(traj) - 1))
    chunk_len = int(chunk_len)
    display_scale = float(display_scale)

    if image_keys is None:
        image_keys = []
    if isinstance(image_keys, str):
        image_keys = [image_keys]

    step = traj[timestep]
    image_keys_tuple = tuple(image_keys)

    gallery_items, warnings_tuple = get_cached_gallery_items(
        repo_id, filename, traj_id, timestep, image_keys_tuple, display_scale, bool(reverse_channels)
    )
    warnings = list(warnings_tuple)

    status_plot, is_valid_start, num_valid_starts = get_cached_status_plot(repo_id, filename, traj_id, timestep, chunk_len)

    image_debug_lines = []
    for _key in image_keys:
        if _key in step.get("obs", {}):
            _arr = np.asarray(step["obs"][_key])
            image_debug_lines.append(
                "{} shape={} dtype={}".format(_key, tuple(_arr.shape), _arr.dtype)
            )

    info_lines = [
        "dataset: {} / {}".format(repo_id, filename),
        "detected trajectories: {}".format(n_traj),
        "trajectory: {}".format(traj_id),
        "episode_id: {}".format(step.get("episode_id", "")),
        "timestep: {} / {}".format(timestep, len(traj) - 1),
        "saved timestep: {}".format(step.get("timestep", timestep)),
        "done: {}".format(int(bool(step.get("done", False)))),
        "if_success: {}".format(int(bool(step.get("if_success", False)))),
        "no_teacher_action: {}".format(int(bool(step.get("no_teacher_action", False)))),
        "no_robot_action: {}".format(int(bool(step.get("no_robot_action", False)))),
        "valid-window length: {}".format(chunk_len),
        "valid_start: {}".format(int(bool(is_valid_start))),
        "num_valid_starts: {}".format(num_valid_starts),
        "",
        "teacher_action: {}".format(_safe_array_str(step.get("teacher_action", []))),
        "robot_action:   {}".format(_safe_array_str(step.get("robot_action", []))),
        "",
        "selected image tensors:",
        *image_debug_lines,
    ]

    if warnings:
        info_lines.append("")
        info_lines.append("Image warnings:")
        info_lines.extend(warnings)

    return gallery_items, status_plot, "\n".join(info_lines)


def build_app():
    repo_id, filename = resolve_dataset(DEFAULT_PRESET)

    try:
        n_traj = get_num_trajectories(repo_id, filename)
        first_keys = get_available_image_keys(repo_id, filename, 0) if n_traj else []
        startup_warning = ""
    except Exception as exc:
        n_traj = 0
        first_keys = []
        startup_warning = repr(exc)

    default_status = "Loaded default dataset\nDetected trajectories: {}\nreverse_channels default: {}".format(n_traj, int(get_default_reverse_channels(DEFAULT_PRESET)))

    with gr.Blocks(title="HDF5 Trajectory Viewer") as demo:
        gr.Markdown(
            "# HDF5 Trajectory Viewer\n\n"
            "Standalone viewer for TrajectoryBuffer-style HDF5 datasets on Hugging Face.\n\n"
            "The status plot matches the local labeling view: orange `no_teacher_action`, green valid-start markers, and a black timestep cursor."
        )

        if startup_warning:
            gr.Markdown("Startup warning: `{}`".format(startup_warning))

        with gr.Row():
            preset = gr.Dropdown(
                choices=list(DATASET_PRESETS.keys()) + ["Custom"],
                value=DEFAULT_PRESET,
                label="Dataset preset",
            )
            custom_repo_id = gr.Textbox(value="", label="Custom repo_id, e.g. Zhaoting123/InsertT", visible=False)
            custom_filename = gr.Textbox(value="", label="Custom HDF5 path in repo", visible=False)

        dataset_status = gr.Textbox(label="Dataset status", lines=2, value=default_status, interactive=False)

        with gr.Row():
            traj_slider = gr.Slider(minimum=0, maximum=max(n_traj - 1, 1), value=0, step=1, label="Trajectory index")
            timestep_slider = gr.Slider(minimum=0, maximum=1, value=0, step=1, label="Timestep")

        with gr.Row():
            image_keys = gr.CheckboxGroup(choices=first_keys, value=first_keys[:2], label="Image keys")
            chunk_len = gr.Slider(minimum=1, maximum=64, value=DEFAULT_CHUNK_LEN, step=1, label="Valid-window length")
            display_scale = gr.State(value=DEFAULT_DISPLAY_SCALE)
            reverse_channels = gr.Checkbox(value=get_default_reverse_channels(DEFAULT_PRESET), label="Reverse channels BGR↔RGB")

        with gr.Row():
            render_btn = gr.Button("Render frame", variant="primary")
            preload_btn = gr.Button("Preload current trajectory")
            video_btn = gr.Button("Build trajectory video")
            video_fps = gr.Slider(minimum=1, maximum=30, value=10, step=1, label="Video FPS")
            video_stride = gr.Slider(minimum=1, maximum=10, value=4, step=1, label="Video frame stride")

        preload_status = gr.Textbox(label="Preload / video status", lines=4, value="Not preloaded yet.", interactive=False)

        with gr.Row():
            with gr.Column(scale=3):
                gallery = gr.Gallery(
                    label="Camera images",
                    columns=2,
                    height=360,
                    object_fit="contain",
                )
            with gr.Column(scale=2):
                status_plot = gr.Image(
                    label="no_teacher_action + valid starts",
                    type="numpy",
                    height=360,
                )

        trajectory_video = gr.Video(label="Trajectory video: smooth browser-side playback")
        info = gr.Textbox(label="Frame info", lines=16)

        with gr.Accordion("Debug: HDF5 tree", open=False):
            inspect_btn = gr.Button("Inspect HDF5 structure")
            hdf5_tree = gr.Textbox(lines=24, label="HDF5 tree")

        preset.change(
            fn=update_custom_visibility,
            inputs=preset,
            outputs=[custom_repo_id, custom_filename],
        ).then(
            fn=update_after_dataset_change,
            inputs=[preset, custom_repo_id, custom_filename],
            outputs=[traj_slider, timestep_slider, image_keys, dataset_status, reverse_channels],
        ).then(
            fn=render_frame,
            inputs=[preset, custom_repo_id, custom_filename, traj_slider, timestep_slider, image_keys, chunk_len, display_scale, reverse_channels],
            outputs=[gallery, status_plot, info],
        )

        custom_repo_id.submit(
            fn=update_after_dataset_change,
            inputs=[preset, custom_repo_id, custom_filename],
            outputs=[traj_slider, timestep_slider, image_keys, dataset_status, reverse_channels],
        )
        custom_filename.submit(
            fn=update_after_dataset_change,
            inputs=[preset, custom_repo_id, custom_filename],
            outputs=[traj_slider, timestep_slider, image_keys, dataset_status, reverse_channels],
        )

        traj_slider.change(
            fn=update_after_traj_change,
            inputs=[preset, custom_repo_id, custom_filename, traj_slider],
            outputs=[timestep_slider, image_keys],
        ).then(
            fn=render_frame,
            inputs=[preset, custom_repo_id, custom_filename, traj_slider, timestep_slider, image_keys, chunk_len, display_scale, reverse_channels],
            outputs=[gallery, status_plot, info],
        )

        timestep_slider.release(
            fn=render_frame,
            inputs=[preset, custom_repo_id, custom_filename, traj_slider, timestep_slider, image_keys, chunk_len, display_scale, reverse_channels],
            outputs=[gallery, status_plot, info],
        )

        for widget in [image_keys, chunk_len, reverse_channels]:
            widget.change(
                fn=render_frame,
                inputs=[preset, custom_repo_id, custom_filename, traj_slider, timestep_slider, image_keys, chunk_len, display_scale, reverse_channels],
                outputs=[gallery, status_plot, info],
            )

        render_btn.click(
            fn=render_frame,
            inputs=[preset, custom_repo_id, custom_filename, traj_slider, timestep_slider, image_keys, chunk_len, display_scale, reverse_channels],
            outputs=[gallery, status_plot, info],
        )

        preload_btn.click(
            fn=preload_current_trajectory,
            inputs=[preset, custom_repo_id, custom_filename, traj_slider, image_keys, chunk_len, display_scale, reverse_channels],
            outputs=preload_status,
        )

        video_btn.click(
            fn=build_current_trajectory_video,
            inputs=[preset, custom_repo_id, custom_filename, traj_slider, image_keys, display_scale, reverse_channels, video_fps, chunk_len, video_stride],
            outputs=[trajectory_video, preload_status],
        )

        inspect_btn.click(
            fn=inspect_hdf5_tree,
            inputs=[preset, custom_repo_id, custom_filename],
            outputs=hdf5_tree,
        )

        demo.load(
            fn=update_after_dataset_change,
            inputs=[preset, custom_repo_id, custom_filename],
            outputs=[traj_slider, timestep_slider, image_keys, dataset_status, reverse_channels],
        ).then(
            fn=render_frame,
            inputs=[preset, custom_repo_id, custom_filename, traj_slider, timestep_slider, image_keys, chunk_len, display_scale, reverse_channels],
            outputs=[gallery, status_plot, info],
        )

    return demo


if __name__ == "__main__":
    demo = build_app()
    demo.launch(
        server_name="0.0.0.0",
        server_port=int(os.environ.get("PORT", 7860)),
        share=False,
        ssr_mode=False,
    )