Spaces:

Zhaoting123
/

Hdf5_data_visualization

Sleeping

App Files Files Community

Hdf5_data_visualization / app.py

Zhaoting123

Update app.py

2fb0eb7 verified 16 days ago

raw

history blame contribute delete

44.9 kB


	# Standalone Hugging Face Space viewer for TrajectoryBuffer-style HDF5 files.
	#
	# requirements.txt:
	# gradio
	# huggingface_hub
	# h5py
	# numpy
	# pillow
	# matplotlib
	# imageio
	# imageio-ffmpeg
	#
	# Optional:
	# opencv-python-headless

	import os
	import re
	import tempfile
	from functools import lru_cache

	import gradio as gr
	import h5py
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	import numpy as np
	from huggingface_hub import hf_hub_download
	from PIL import Image, ImageDraw

	try:
	import imageio.v2 as imageio
	except Exception:
	imageio = None

	try:
	import cv2
	except Exception:
	cv2 = None


	DATASET_PRESETS = {
	"Robosuite Square Correction": {
	"repo_id": "Zhaoting123/Robosuite_Square_image_abs_with_state",
	"filename": (
	"20260410_205606_Diffusion_CLIC_intervention_Circular_square_image_abs_"
	"Ta16_offlineFalse_Scale0.01/trajectory_buffer_0.hdf5"
	),
	"default_reverse_channels": False,
	},
	"InsertT Demonstration": {
	"repo_id": "Zhaoting123/InsertT",
	"filename": "trajectory_buffer_Nov10_demo.hdf5",
	"default_reverse_channels": True,
	},
	"InsertT Correction": {
	"repo_id": "Zhaoting123/InsertT",
	"filename": "trajectory_buffer_Nov11_intervention.hdf5",
	"default_reverse_channels": True,
	},
	"RoundTable Correction": {
	"repo_id": "Zhaoting123/Furniture_Bench_Round_Table_Assembly",
	"filename": "trajectory_buffer_0_Nov24_intervention_relabeled.hdf5",
	"default_reverse_channels": True,
	},
	}

	DEFAULT_PRESET = "Robosuite Square Correction"
	REPO_TYPE = "dataset"
	DEFAULT_CHUNK_LEN = 16
	DEFAULT_DISPLAY_SCALE = 1
	VIDEO_STATUS_FIGSIZE = (6.0, 1.8)
	VIDEO_STATUS_DPI = 120
	PREFERRED_IMAGE_KEYS = [
	"image1",
	"image2",
	"agentview_image",
	"robot0_eye_in_hand_image",
	"front_image",
	"wrist_image",
	]
	IMAGE_KEY_HINTS = ["rgb", "image", "img", "camera", "cam"]


	def resolve_dataset(preset_name, custom_repo_id=None, custom_filename=None):
	preset_name = preset_name or DEFAULT_PRESET
	if preset_name == "Custom":
	repo_id = str(custom_repo_id or "").strip()
	filename = str(custom_filename or "").strip()
	if not repo_id or not filename:
	raise ValueError("For Custom mode, provide both repo_id and HDF5 filename/path.")
	return repo_id, filename

	item = DATASET_PRESETS.get(preset_name, DATASET_PRESETS[DEFAULT_PRESET])
	return item["repo_id"], item["filename"]


	def get_default_reverse_channels(preset_name):
	"""Dataset-specific default for BGR<->RGB reversal.

	Robosuite Square presets use normal RGB ordering.
	InsertT / PushT-style preset requires reversal.
	Custom datasets default to False so users can still override manually.
	"""
	preset_name = preset_name or DEFAULT_PRESET
	if preset_name == "Custom":
	return False
	item = DATASET_PRESETS.get(preset_name, DATASET_PRESETS[DEFAULT_PRESET])
	return bool(item.get("default_reverse_channels", False))


	@lru_cache(maxsize=8)
	def get_local_hdf5_path(repo_id, filename):
	return hf_hub_download(repo_id=repo_id, filename=filename, repo_type=REPO_TYPE)


	def _natural_sort_key(name):
	match = re.search(r"([0-9]+)$", str(name))
	if match:
	return 0, int(match.group(1))
	return 1, str(name)


	@lru_cache(maxsize=8)
	def get_trajectory_keys(repo_id, filename):
	path = get_local_hdf5_path(repo_id, filename)
	with h5py.File(path, "r") as f:
	root_episode_keys = [
	key for key in f.keys()
	if isinstance(f[key], h5py.Group) and str(key).startswith("episode_")
	]
	if root_episode_keys:
	return tuple(sorted(root_episode_keys, key=_natural_sort_key))

	if "data" in f and isinstance(f["data"], h5py.Group):
	data_group = f["data"]
	keys = [key for key in data_group.keys() if isinstance(data_group[key], h5py.Group)]
	return tuple("data/" + key for key in sorted(keys, key=_natural_sort_key))

	keys = [key for key in f.keys() if isinstance(f[key], h5py.Group)]
	return tuple(sorted(keys, key=_natural_sort_key))


	@lru_cache(maxsize=8)
	def get_num_trajectories(repo_id, filename):
	return len(get_trajectory_keys(repo_id, filename))


	def inspect_hdf5_tree(preset_name, custom_repo_id, custom_filename, max_lines=180):
	repo_id, filename = resolve_dataset(preset_name, custom_repo_id, custom_filename)
	path = get_local_hdf5_path(repo_id, filename)

	lines = []
	with h5py.File(path, "r") as f:
	def visitor(name, obj):
	if len(lines) >= max_lines:
	return
	if isinstance(obj, h5py.Dataset):
	lines.append("DATASET {} shape={} dtype={}".format(name, obj.shape, obj.dtype))
	elif isinstance(obj, h5py.Group):
	lines.append("GROUP {}".format(name))

	f.visititems(visitor)

	if len(lines) >= max_lines:
	lines.append("...")
	return "\n".join(lines) if lines else "No HDF5 contents found."


	def _read_dataset_value(dataset):
	value = dataset[()]
	if isinstance(value, bytes):
	return value.decode("utf-8")
	return value


	def _read_group_recursive(group):
	out = {}
	for key, obj in group.items():
	if isinstance(obj, h5py.Dataset):
	out[key] = _read_dataset_value(obj)
	elif isinstance(obj, h5py.Group):
	out[key] = _read_group_recursive(obj)
	return out


	def _find_first_key(mapping, candidate_keys):
	for key in candidate_keys:
	if key in mapping:
	return key
	return None


	def _infer_time_length(data):
	for key in ["timesteps", "dones", "robot_actions", "teacher_actions", "actions"]:
	if key in data:
	arr = np.asarray(data[key])
	if arr.ndim >= 1:
	return int(arr.shape[0])

	obs_group = None
	if isinstance(data.get("observation"), dict):
	obs_group = data["observation"]
	elif isinstance(data.get("obs"), dict):
	obs_group = data["obs"]

	if obs_group:
	lengths = []
	for value in obs_group.values():
	arr = np.asarray(value)
	if arr.ndim >= 1:
	lengths.append(int(arr.shape[0]))
	if lengths:
	values, counts = np.unique(lengths, return_counts=True)
	return int(values[np.argmax(counts)])
	return 1


	def _slice_time(value, t, T):
	arr = np.asarray(value)
	if arr.ndim >= 1 and arr.shape[0] == T:
	return arr[t]
	return arr


	@lru_cache(maxsize=64)
	def load_traj(repo_id, filename, traj_id):
	traj_keys = get_trajectory_keys(repo_id, filename)
	if not traj_keys:
	return []

	traj_id = int(np.clip(int(traj_id), 0, len(traj_keys) - 1))
	traj_key = traj_keys[traj_id]
	path = get_local_hdf5_path(repo_id, filename)

	with h5py.File(path, "r") as f:
	data = _read_group_recursive(f[traj_key])

	T = _infer_time_length(data)

	if isinstance(data.get("observation"), dict):
	obs_all = data["observation"]
	elif isinstance(data.get("obs"), dict):
	obs_all = data["obs"]
	else:
	obs_all = {}

	action_key = _find_first_key(data, ["actions", "action"])
	teacher_key = _find_first_key(data, ["teacher_actions", "teacher_action"])
	robot_key = _find_first_key(data, ["robot_actions", "robot_action"])
	no_teacher_key = _find_first_key(data, ["no_teacher_actions", "no_teacher_action"])
	no_robot_key = _find_first_key(data, ["no_robot_actions", "no_robot_action"])
	done_key = _find_first_key(data, ["dones", "done"])
	timestep_key = _find_first_key(data, ["timesteps", "timestep"])
	success_key = _find_first_key(data, ["if_success", "success", "successes"])

	traj = []
	for t in range(T):
	obs_t = {key: _slice_time(value, t, T) for key, value in obs_all.items()}

	default_action = np.zeros(1, dtype=np.float32)
	if action_key is not None:
	default_action = _slice_time(data[action_key], t, T)

	teacher_action = _slice_time(data[teacher_key], t, T) if teacher_key else default_action
	robot_action = _slice_time(data[robot_key], t, T) if robot_key else default_action
	no_teacher = _slice_time(data[no_teacher_key], t, T) if no_teacher_key else False
	no_robot = _slice_time(data[no_robot_key], t, T) if no_robot_key else False
	done = _slice_time(data[done_key], t, T) if done_key else False
	if_success = _slice_time(data[success_key], t, T) if success_key else False

	timestep = t
	if timestep_key is not None:
	timestep_arr = _slice_time(data[timestep_key], t, T)
	timestep = int(np.asarray(timestep_arr).reshape(-1)[0])

	traj.append({
	"obs": obs_t,
	"robot_action": np.asarray(robot_action),
	"teacher_action": np.asarray(teacher_action),
	"done": bool(np.asarray(done).reshape(-1)[0]),
	"timestep": timestep,
	"no_robot_action": bool(np.asarray(no_robot).reshape(-1)[0]),
	"no_teacher_action": bool(np.asarray(no_teacher).reshape(-1)[0]),
	"episode_id": traj_key,
	"if_success": bool(np.asarray(if_success).reshape(-1)[0]),
	})

	return traj


	def _extract_latest_obs_value(value):
	"""Return the latest stacked observation only when there is a clear stack axis.

	Important:
	- [obs_T, C, H, W] or [obs_T, H, W, C] should become the latest frame.
	- [C, H, W] must NOT be sliced, otherwise an RGB image becomes one
	grayscale channel.
	"""
	arr = np.asarray(value)

	# Stacked image observations, e.g. [obs_T, C, H, W] or [obs_T, H, W, C].
	if arr.ndim == 4 and arr.shape[0] in (1, 2, 3, 4):
	channel_first = arr.shape[1] in (1, 3, 4)
	channel_last = arr.shape[-1] in (1, 3, 4)
	if channel_first or channel_last:
	return arr[-1]

	# Stacked vector observations, e.g. [obs_T, D]. Keep this for non-image obs.
	if arr.ndim == 2 and arr.shape[0] in (1, 2):
	return arr[-1]

	return arr


	def _looks_like_image_array(key, value):
	arr = np.asarray(value)
	key_l = str(key).lower()
	key_hint = any(hint in key_l for hint in IMAGE_KEY_HINTS)

	# Remove only a clear stacked-image axis for shape detection.
	if arr.ndim == 4 and arr.shape[0] in (1, 2, 3, 4):
	if arr.shape[1] in (1, 3, 4) or arr.shape[-1] in (1, 3, 4):
	arr = arr[-1]

	shape_hint = False
	if arr.ndim == 2:
	shape_hint = True
	elif arr.ndim == 3:
	shape_hint = arr.shape[-1] in (1, 3, 4) or arr.shape[0] in (1, 3, 4)
	elif arr.ndim == 4:
	shape_hint = arr.shape[1] in (1, 3, 4) or arr.shape[-1] in (1, 3, 4)

	return key_hint or shape_hint


	def _float_img_to_uint8(img):
	arr = img.astype(np.float32)
	arr_min = float(np.nanmin(arr))
	arr_max = float(np.nanmax(arr))

	if arr_min >= -1.01 and arr_max <= 1.01:
	if arr_min < 0.0:
	arr = (arr + 1.0) * 0.5
	arr = np.clip(arr, 0.0, 1.0) * 255.0
	elif arr_max <= 255.0:
	arr = np.clip(arr, 0.0, 255.0)
	else:
	arr = 255.0 * (arr - arr_min) / max(arr_max - arr_min, 1e-8)

	return np.round(arr).astype(np.uint8)


	def _extract_display_image(value, reverse_channels=False):
	img = np.asarray(_extract_latest_obs_value(value))

	if img.ndim == 2:
	img = np.repeat(img[..., None], 3, axis=-1)
	elif img.ndim == 3 and img.shape[0] in (1, 3, 4):
	img = np.transpose(img, (1, 2, 0))

	if img.ndim == 3 and img.shape[-1] == 1:
	img = np.repeat(img, 3, axis=-1)
	elif img.ndim == 3 and img.shape[-1] == 4:
	img = img[..., :3]

	if img.ndim != 3:
	raise ValueError("Unsupported image shape: {}".format(img.shape))

	out = img.copy() if img.dtype == np.uint8 else _float_img_to_uint8(img)

	if reverse_channels and out.shape[-1] == 3:
	out = out[..., ::-1]
	return out


	def _resize_image_for_display(img, display_scale):
	scale = float(display_scale)
	if scale == 1.0:
	return img

	h, w = img.shape[:2]
	new_size = (max(1, int(round(w * scale))), max(1, int(round(h * scale))))

	if cv2 is not None:
	return cv2.resize(img, new_size, interpolation=cv2.INTER_NEAREST)

	pil_img = Image.fromarray(img)
	return np.asarray(pil_img.resize(new_size, resample=Image.Resampling.NEAREST))


	def _extract_mixed_action_chunk(traj, start_idx, chunk_length):
	chunk = []
	sources = []
	end_idx = min(len(traj), int(start_idx) + int(chunk_length))

	for idx in range(int(start_idx), end_idx):
	step = traj[idx]
	use_teacher = not bool(step.get("no_teacher_action", False))
	action = step["teacher_action"] if use_teacher else step["robot_action"]
	chunk.append(np.asarray(action, dtype=np.float32).reshape(-1))
	sources.append("T" if use_teacher else "R")

	if not chunk:
	return None, ""
	return np.stack(chunk, axis=0), "".join(sources)


	def _extract_robot_action_chunk(traj, start_idx, chunk_length):
	chunk = []
	end_idx = min(len(traj), int(start_idx) + int(chunk_length))

	for idx in range(int(start_idx), end_idx):
	step = traj[idx]
	chunk.append(np.asarray(step["robot_action"], dtype=np.float32).reshape(-1))

	if not chunk:
	return None
	return np.stack(chunk, axis=0)


	def _safe_array_str(value, precision=3, max_items=24):
	arr = np.asarray(value).reshape(-1)
	shown = arr[:max_items]
	text = np.array2string(shown, precision=precision, separator=", ")
	if arr.size > max_items:
	text += " ... +{} more".format(arr.size - max_items)
	return text


	def _make_action_chunk_plot(mixed_chunk, robot_chunk):
	if mixed_chunk is None:
	return None

	mixed_chunk = np.asarray(mixed_chunk, dtype=np.float32)
	if mixed_chunk.ndim == 1:
	mixed_chunk = mixed_chunk[:, None]

	fig, ax = plt.subplots(figsize=(7, 3.2), dpi=140)
	x = np.arange(mixed_chunk.shape[0])
	max_dims = min(mixed_chunk.shape[1], 10)

	for dim in range(max_dims):
	ax.plot(x, mixed_chunk[:, dim], label="mixed[{}]".format(dim))

	if robot_chunk is not None:
	robot_chunk = np.asarray(robot_chunk, dtype=np.float32)
	if robot_chunk.ndim == 1:
	robot_chunk = robot_chunk[:, None]
	for dim in range(min(robot_chunk.shape[1], max_dims)):
	ax.plot(
	x,
	robot_chunk[:, dim],
	linestyle="--",
	alpha=0.55,
	label="robot[{}]".format(dim),
	)

	ax.set_title("Action chunk")
	ax.set_xlabel("chunk step")
	ax.set_ylabel("action value")
	ax.grid(True, alpha=0.3)
	ax.legend(loc="upper right", fontsize=7, ncol=2)
	fig.tight_layout()
	fig.canvas.draw()
	rgba = np.asarray(fig.canvas.buffer_rgba())
	image = rgba[..., :3].copy()
	plt.close(fig)
	return image


	@lru_cache(maxsize=8192)
	def get_cached_gallery_items(repo_id, filename, traj_id, timestep, image_keys_tuple, display_scale, reverse_channels):
	traj = load_traj(repo_id, filename, int(traj_id))
	timestep = int(np.clip(int(timestep), 0, len(traj) - 1))
	obs = traj[timestep].get("obs", {})

	gallery_items = []
	warnings = []
	for key in image_keys_tuple:
	if key not in obs:
	warnings.append("Missing image key: {}".format(key))
	continue
	try:
	img = _extract_display_image(obs[key], reverse_channels=bool(reverse_channels))
	img = _resize_image_for_display(img, float(display_scale))
	gallery_items.append((img, key))
	except Exception as exc:
	warnings.append("{}: {}".format(key, exc))

	return gallery_items, tuple(warnings)


	def _compute_valid_start_indices(traj, min_seq_len):
	"""Match the original local script's valid-start heuristic.

	A timestep is valid when the following min_seq_len steps all have
	no_teacher_action == False.
	"""
	total_steps = len(traj)
	min_seq_len = int(max(1, min_seq_len))
	no_teacher = np.asarray(
	[int(bool(step.get("no_teacher_action", False))) for step in traj],
	dtype=np.int32,
	)

	valid_indices = []
	max_start = total_steps - min_seq_len + 1
	for t in range(max(0, max_start)):
	if int(np.sum(no_teacher[t:t + min_seq_len])) == 0:
	valid_indices.append(t)

	return no_teacher, valid_indices


	def _make_trajectory_status_plot(traj, timestep, min_seq_len):
	"""Render the same high-level status figure as the local matplotlib tool.

	Shows:
	- orange no_teacher_action step plot
	- green triangles for algorithmic valid start points
	- black vertical cursor at current timestep
	"""
	total_steps = len(traj)
	if total_steps == 0:
	return None, False, 0

	timestep = int(np.clip(int(timestep), 0, total_steps - 1))
	timesteps = np.asarray(
	[int(np.asarray(step.get("timestep", idx)).reshape(-1)[0]) for idx, step in enumerate(traj)],
	dtype=np.int32,
	)
	no_teacher, valid_indices = _compute_valid_start_indices(traj, min_seq_len)
	is_valid_start = timestep in set(valid_indices)

	fig, ax = plt.subplots(figsize=(10.5, 2.8), dpi=170)

	ax.step(
	np.arange(total_steps),
	no_teacher,
	where="post",
	label="no_teacher_action",
	color="orange",
	)

	if valid_indices:
	ax.scatter(
	valid_indices,
	[-0.15] * len(valid_indices),
	color="green",
	marker="^",
	s=18,
	label="Valid Start (len >= {})".format(int(min_seq_len)),
	)

	ax.axvline(timestep, color="black", linestyle="-", alpha=0.85, linewidth=1.5)
	ax.set_xlim(0, max(total_steps - 1, 1))
	ax.set_ylim(-0.38, 1.1)
	ax.set_ylabel("Flag", fontsize=10)
	ax.set_xlabel("Timestep index", fontsize=10)
	ax.set_yticks([0, 1])
	ax.set_yticklabels(["False", "True"])
	ax.grid(True, axis="x", alpha=0.2)

	title = "no_teacher_action \| step {} / {}".format(timestep, total_steps - 1)
	if is_valid_start:
	title += " \| VALID START"
	ax.set_title(title, fontsize=11)
	ax.tick_params(axis="both", labelsize=9)
	ax.legend(loc="upper right", fontsize=9)

	# Add saved timestep annotation if the stored timestep is not the same as index.
	saved_timestep = int(timesteps[timestep]) if len(timesteps) else timestep
	if saved_timestep != timestep:
	ax.text(
	0.01,
	0.04,
	"saved timestep: {}".format(saved_timestep),
	transform=ax.transAxes,
	fontsize=8,
	va="bottom",
	ha="left",
	)

	fig.tight_layout()
	fig.canvas.draw()
	rgba = np.asarray(fig.canvas.buffer_rgba())
	image = rgba[..., :3].copy()
	plt.close(fig)

	return image, bool(is_valid_start), len(valid_indices)


	@lru_cache(maxsize=8192)
	def get_cached_status_plot(repo_id, filename, traj_id, timestep, min_seq_len):
	traj = load_traj(repo_id, filename, int(traj_id))
	timestep = int(np.clip(int(timestep), 0, len(traj) - 1))
	return _make_trajectory_status_plot(traj, timestep, int(min_seq_len))


	def preload_current_trajectory(preset_name, custom_repo_id, custom_filename, traj_id, image_keys, chunk_len, display_scale, reverse_channels):
	repo_id, filename = resolve_dataset(preset_name, custom_repo_id, custom_filename)
	n_traj = get_num_trajectories(repo_id, filename)
	if n_traj == 0:
	return "No trajectories found."

	traj_id = int(np.clip(int(traj_id), 0, n_traj - 1))
	traj = load_traj(repo_id, filename, traj_id)
	if not traj:
	return "Trajectory could not be loaded."

	if image_keys is None:
	image_keys = []
	if isinstance(image_keys, str):
	image_keys = [image_keys]
	image_keys_tuple = tuple(image_keys)

	total = len(traj)
	for t in range(total):
	get_cached_gallery_items(repo_id, filename, traj_id, t, image_keys_tuple, float(display_scale), bool(reverse_channels))
	get_cached_status_plot(repo_id, filename, traj_id, t, int(chunk_len))

	status = "Preloaded trajectory {}".format(traj_id)
	status += "\nFrames cached: {}".format(total)
	status += "\nImage keys: {}".format(", ".join(image_keys_tuple) if image_keys_tuple else "none")
	return status


	def _compose_video_frame(gallery_items, frame_label, status_plot=None):
	"""Compose one video frame.

	Top: selected observation images.
	Bottom: trajectory-status plot with the moving timestep cursor.

	Important: do NOT downscale the status plot to the image width. The plot
	contains tick labels and a legend, so preserving its native width makes the
	generated MP4 much more readable.
	"""
	small_text_y = 3

	if not gallery_items:
	obs_canvas = Image.new("RGB", (640, 360), color=(20, 20, 20))
	draw = ImageDraw.Draw(obs_canvas)
	draw.text((8, small_text_y), "No selected image keys", fill=(255, 255, 255))
	else:
	pil_images = []
	for img, label in gallery_items:
	pil_img = Image.fromarray(np.asarray(img, dtype=np.uint8)).convert("RGB")

	# Keep the image-key caption compact; large captions waste video space.
	label_h = 16
	panel = Image.new("RGB", (pil_img.width, pil_img.height + label_h), color=(0, 0, 0))
	panel.paste(pil_img, (0, label_h))
	draw = ImageDraw.Draw(panel)
	draw.text((4, small_text_y), str(label), fill=(220, 220, 220))
	pil_images.append(panel)

	gap = 8
	top_h = 18
	width = sum(im.width for im in pil_images) + gap * max(len(pil_images) - 1, 0)
	height = max(im.height for im in pil_images) + top_h
	obs_canvas = Image.new("RGB", (width, height), color=(0, 0, 0))
	draw = ImageDraw.Draw(obs_canvas)

	# Compact frame label above the image panels.
	draw.text((6, small_text_y), frame_label, fill=(220, 220, 220))

	x = 0
	for im in pil_images:
	obs_canvas.paste(im, (x, top_h))
	x += im.width + gap

	if status_plot is not None:
	status_img = Image.fromarray(np.asarray(status_plot, dtype=np.uint8)).convert("RGB")

	# Preserve the status plot resolution. If needed, pad the observation
	# canvas to the same width and center it above the plot.
	final_w = max(obs_canvas.width, status_img.width)
	if obs_canvas.width < final_w:
	padded_obs = Image.new("RGB", (final_w, obs_canvas.height), color=(0, 0, 0))
	padded_obs.paste(obs_canvas, ((final_w - obs_canvas.width) // 2, 0))
	obs_canvas = padded_obs
	elif status_img.width < final_w:
	padded_status = Image.new("RGB", (final_w, status_img.height), color=(255, 255, 255))
	padded_status.paste(status_img, ((final_w - status_img.width) // 2, 0))
	status_img = padded_status

	gap_h = 8
	canvas = Image.new(
	"RGB",
	(final_w, obs_canvas.height + gap_h + status_img.height),
	color=(0, 0, 0),
	)
	canvas.paste(obs_canvas, (0, 0))
	canvas.paste(status_img, (0, obs_canvas.height + gap_h))
	else:
	canvas = obs_canvas

	# Many MP4 encoders prefer dimensions divisible by 16.
	pad_w = int(np.ceil(canvas.width / 16.0) * 16)
	pad_h = int(np.ceil(canvas.height / 16.0) * 16)
	if pad_w != canvas.width or pad_h != canvas.height:
	padded = Image.new("RGB", (pad_w, pad_h), color=(0, 0, 0))
	padded.paste(canvas, (0, 0))
	canvas = padded

	return np.asarray(canvas)


	@lru_cache(maxsize=128)
	def get_video_status_plot_base(repo_id, filename, traj_id, valid_window_len):
	"""Render the static part of the status plot once for video export.

	Matplotlib per frame is slow. This function draws no_teacher_action and
	valid-start markers once, records the axes pixel bounds, and returns a base
	image. The moving cursor is later drawn with PIL, which is much faster.
	"""
	traj = load_traj(repo_id, filename, int(traj_id))
	total_steps = len(traj)
	if total_steps == 0:
	return None, (0, 0, 1, 1), 0

	no_teacher, valid_indices = _compute_valid_start_indices(traj, int(valid_window_len))

	fig, ax = plt.subplots(figsize=VIDEO_STATUS_FIGSIZE, dpi=VIDEO_STATUS_DPI)
	ax.step(
	np.arange(total_steps),
	no_teacher,
	where="post",
	label="no_teacher_action",
	color="orange",
	)

	if valid_indices:
	ax.scatter(
	valid_indices,
	[-0.15] * len(valid_indices),
	color="green",
	marker="^",
	s=18,
	label="Valid Start (len >= {})".format(int(valid_window_len)),
	)

	ax.set_xlim(0, max(total_steps - 1, 1))
	ax.set_ylim(-0.38, 1.1)
	ax.set_ylabel("Flag", fontsize=8)
	ax.set_xlabel("Timestep index", fontsize=8)
	ax.set_yticks([0, 1])
	ax.set_yticklabels(["False", "True"])
	ax.grid(True, axis="x", alpha=0.2)
	ax.set_title("no_teacher_action and valid starts", fontsize=9)
	ax.tick_params(axis="both", labelsize=7)
	ax.legend(loc="upper right", fontsize=7)
	fig.tight_layout()
	fig.canvas.draw()

	rgba = np.asarray(fig.canvas.buffer_rgba())
	base = rgba[..., :3].copy()

	bbox = ax.get_window_extent()
	height = base.shape[0]

	# Matplotlib bbox origin is bottom-left, image origin is top-left.
	x0 = int(round(bbox.x0))
	x1 = int(round(bbox.x1))
	y0 = int(round(height - bbox.y1))
	y1 = int(round(height - bbox.y0))

	plt.close(fig)
	return base, (x0, y0, x1, y1), total_steps


	@lru_cache(maxsize=8192)
	def get_cached_video_status_frame(repo_id, filename, traj_id, timestep, valid_window_len):
	"""Draw the moving cursor on a cached static status plot."""
	base, bounds, total_steps = get_video_status_plot_base(
	repo_id,
	filename,
	int(traj_id),
	int(valid_window_len),
	)
	if base is None:
	return None

	timestep = int(np.clip(int(timestep), 0, max(total_steps - 1, 0)))
	x0, y0, x1, y1 = bounds
	denom = max(total_steps - 1, 1)
	x = int(round(x0 + (x1 - x0) * float(timestep) / float(denom)))

	img = Image.fromarray(np.asarray(base, dtype=np.uint8)).convert("RGB")
	draw = ImageDraw.Draw(img)

	# Moving cursor.
	draw.line([(x, y0), (x, y1)], fill=(0, 0, 0), width=4)

	# Compact step label, top-left of the plot area.
	label = "step {}/{}".format(timestep, total_steps - 1)
	draw.rectangle((x0 + 4, y0 + 4, x0 + 118, y0 + 24), fill=(255, 255, 255))
	draw.text((x0 + 8, y0 + 7), label, fill=(0, 0, 0))

	return np.asarray(img)

	def _draw_status_cursor_on_base(base, bounds, total_steps, timestep):
	"""Fast video status frame: copy one static Matplotlib image and draw cursor.

	This avoids calling the lru-cached per-timestep status frame function during
	video export. For long trajectories, caching thousands of status images can
	consume a lot of memory and still requires PIL conversion for every frame.
	"""
	if base is None:
	return None

	total_steps = int(max(total_steps, 1))
	timestep = int(np.clip(int(timestep), 0, total_steps - 1))
	x0, y0, x1, y1 = [int(v) for v in bounds]
	denom = max(total_steps - 1, 1)
	x = int(round(x0 + (x1 - x0) * float(timestep) / float(denom)))

	img = np.asarray(base, dtype=np.uint8).copy()

	# Draw the vertical cursor directly with NumPy. This is much cheaper than
	# creating a Matplotlib plot for every frame.
	x_left = max(0, x - 2)
	x_right = min(img.shape[1], x + 2)
	y_top = max(0, y0)
	y_bottom = min(img.shape[0], y1)
	img[y_top:y_bottom, x_left:x_right, :] = 0

	# Small text label. PIL is used only for the label, not for the whole plot.
	pil_img = Image.fromarray(img).convert("RGB")
	draw = ImageDraw.Draw(pil_img)
	label = "step {}/{}".format(timestep, total_steps - 1)
	draw.rectangle((x0 + 4, y0 + 4, x0 + 126, y0 + 24), fill=(255, 255, 255))
	draw.text((x0 + 8, y0 + 7), label, fill=(0, 0, 0))
	return np.asarray(pil_img)


	def _get_fast_video_writer(out_path, fps):
	"""Use ffmpeg's ultrafast x264 preset for interactive Spaces exports."""
	return imageio.get_writer(
	out_path,
	fps=float(fps),
	codec="libx264",
	macro_block_size=16,
	ffmpeg_params=[
	"-preset", "ultrafast",
	"-crf", "28",
	"-pix_fmt", "yuv420p",
	"-movflags", "+faststart",
	],
	)


	def build_current_trajectory_video(preset_name, custom_repo_id, custom_filename, traj_id, image_keys, display_scale, reverse_channels, fps, valid_window_len, video_stride=4):
	if imageio is None:
	return None, "Video export requires imageio and imageio-ffmpeg in requirements.txt."

	repo_id, filename = resolve_dataset(preset_name, custom_repo_id, custom_filename)
	n_traj = get_num_trajectories(repo_id, filename)
	if n_traj == 0:
	return None, "No trajectories found."

	traj_id = int(np.clip(int(traj_id), 0, n_traj - 1))
	traj = load_traj(repo_id, filename, traj_id)
	if not traj:
	return None, "Trajectory could not be loaded."

	if image_keys is None:
	image_keys = []
	if isinstance(image_keys, str):
	image_keys = [image_keys]
	image_keys_tuple = tuple(image_keys)

	video_stride = int(max(1, int(video_stride)))
	frame_indices = list(range(0, len(traj), video_stride))
	if frame_indices and frame_indices[-1] != len(traj) - 1:
	frame_indices.append(len(traj) - 1)

	safe_repo = re.sub(r"[^A-Za-z0-9_.-]+", "_", repo_id)
	safe_file = re.sub(r"[^A-Za-z0-9_.-]+", "_", filename)[-80:]
	out_path = os.path.join(
	tempfile.gettempdir(),
	"trajectory_{}_{}_traj{:04d}_fps{}_stride{}.mp4".format(
	safe_repo, safe_file, traj_id, int(fps), video_stride
	),
	)

	# Build the static status plot once. During export, only draw the cursor.
	status_base, status_bounds, total_steps = get_video_status_plot_base(
	repo_id,
	filename,
	traj_id,
	int(valid_window_len),
	)

	writer = _get_fast_video_writer(out_path, fps)
	written = 0
	try:
	for t in frame_indices:
	# Use the existing cached image extraction for correctness, but avoid
	# cached per-timestep status images to reduce memory pressure.
	gallery_items, _warnings = get_cached_gallery_items(
	repo_id,
	filename,
	traj_id,
	t,
	image_keys_tuple,
	float(display_scale),
	bool(reverse_channels),
	)
	label = "trajectory {} \| frame {}/{}".format(traj_id, t, len(traj) - 1)
	status_plot = _draw_status_cursor_on_base(status_base, status_bounds, total_steps, t)
	frame = _compose_video_frame(gallery_items, label, status_plot=status_plot)
	writer.append_data(frame)
	written += 1
	finally:
	writer.close()

	approx_seconds = float(written) / float(max(float(fps), 1.0))
	status = "Built trajectory video with optimized encoder and status rendering"
	status += "\nTrajectory: {}".format(traj_id)
	status += "\nOriginal timesteps: {} \| Written frames: {} \| Stride: {}".format(len(traj), written, video_stride)
	status += "\nFPS: {} \| Approx video duration: {:.1f}s".format(fps, approx_seconds)
	status += "\nValid-window length: {}".format(int(valid_window_len))
	status += "\nSpeedups: x264 ultrafast preset; static status plot rendered once; cursor drawn with NumPy/PIL"
	return out_path, status

	def get_available_image_keys(repo_id, filename, traj_id):
	n_traj = get_num_trajectories(repo_id, filename)
	if n_traj == 0:
	return []

	traj_id = int(np.clip(int(traj_id), 0, n_traj - 1))
	traj = load_traj(repo_id, filename, traj_id)
	if not traj:
	return []

	obs = traj[0].get("obs", {})
	keys = []
	for key, value in obs.items():
	try:
	if _looks_like_image_array(key, value):
	keys.append(key)
	except Exception:
	pass

	ordered = [key for key in PREFERRED_IMAGE_KEYS if key in keys]
	ordered.extend([key for key in keys if key not in ordered])
	return ordered


	def update_custom_visibility(preset_name):
	visible = preset_name == "Custom"
	return gr.update(visible=visible), gr.update(visible=visible)


	def update_after_dataset_change(preset_name, custom_repo_id, custom_filename):
	repo_id, filename = resolve_dataset(preset_name, custom_repo_id, custom_filename)
	n_traj = get_num_trajectories(repo_id, filename)

	reverse_default = get_default_reverse_channels(preset_name)

	if n_traj == 0:
	status = "Loaded `{}` / `{}`".format(repo_id, filename)
	status += "\nDetected trajectories: 0"
	status += "\nreverse_channels default: {}".format(int(reverse_default))
	return (
	gr.update(maximum=1, value=0),
	gr.update(maximum=1, value=0),
	gr.update(choices=[], value=[]),
	status,
	gr.update(value=reverse_default),
	)

	keys = get_available_image_keys(repo_id, filename, 0)
	traj = load_traj(repo_id, filename, 0)

	status = "Loaded `{}` / `{}`".format(repo_id, filename)
	status += "\nDetected trajectories: {}".format(n_traj)
	status += "\nreverse_channels default: {}".format(int(reverse_default))

	return (
	gr.update(maximum=max(n_traj - 1, 1), value=0),
	gr.update(maximum=max(len(traj) - 1, 1), value=0),
	gr.update(choices=keys, value=keys[:2]),
	status,
	gr.update(value=reverse_default),
	)


	def update_after_traj_change(preset_name, custom_repo_id, custom_filename, traj_id):
	repo_id, filename = resolve_dataset(preset_name, custom_repo_id, custom_filename)
	n_traj = get_num_trajectories(repo_id, filename)
	if n_traj == 0:
	return gr.update(maximum=1, value=0), gr.update(choices=[], value=[])

	traj_id = int(np.clip(int(traj_id), 0, n_traj - 1))
	traj = load_traj(repo_id, filename, traj_id)
	keys = get_available_image_keys(repo_id, filename, traj_id)

	return (
	gr.update(maximum=max(len(traj) - 1, 1), value=0),
	gr.update(choices=keys, value=keys[:2]),
	)


	def render_frame(preset_name, custom_repo_id, custom_filename, traj_id, timestep, image_keys, chunk_len, display_scale, reverse_channels):
	repo_id, filename = resolve_dataset(preset_name, custom_repo_id, custom_filename)
	n_traj = get_num_trajectories(repo_id, filename)

	if n_traj == 0:
	return [], None, "No trajectory groups found. Open Debug: HDF5 tree."

	traj_id = int(np.clip(int(traj_id), 0, n_traj - 1))
	traj = load_traj(repo_id, filename, traj_id)
	if not traj:
	return [], None, "Trajectory could not be loaded. Open Debug: HDF5 tree."

	timestep = int(np.clip(int(timestep), 0, len(traj) - 1))
	chunk_len = int(chunk_len)
	display_scale = float(display_scale)

	if image_keys is None:
	image_keys = []
	if isinstance(image_keys, str):
	image_keys = [image_keys]

	step = traj[timestep]
	image_keys_tuple = tuple(image_keys)

	gallery_items, warnings_tuple = get_cached_gallery_items(
	repo_id, filename, traj_id, timestep, image_keys_tuple, display_scale, bool(reverse_channels)
	)
	warnings = list(warnings_tuple)

	status_plot, is_valid_start, num_valid_starts = get_cached_status_plot(repo_id, filename, traj_id, timestep, chunk_len)

	image_debug_lines = []
	for _key in image_keys:
	if _key in step.get("obs", {}):
	_arr = np.asarray(step["obs"][_key])
	image_debug_lines.append(
	"{} shape={} dtype={}".format(_key, tuple(_arr.shape), _arr.dtype)
	)

	info_lines = [
	"dataset: {} / {}".format(repo_id, filename),
	"detected trajectories: {}".format(n_traj),
	"trajectory: {}".format(traj_id),
	"episode_id: {}".format(step.get("episode_id", "")),
	"timestep: {} / {}".format(timestep, len(traj) - 1),
	"saved timestep: {}".format(step.get("timestep", timestep)),
	"done: {}".format(int(bool(step.get("done", False)))),
	"if_success: {}".format(int(bool(step.get("if_success", False)))),
	"no_teacher_action: {}".format(int(bool(step.get("no_teacher_action", False)))),
	"no_robot_action: {}".format(int(bool(step.get("no_robot_action", False)))),
	"valid-window length: {}".format(chunk_len),
	"valid_start: {}".format(int(bool(is_valid_start))),
	"num_valid_starts: {}".format(num_valid_starts),
	"",
	"teacher_action: {}".format(_safe_array_str(step.get("teacher_action", []))),
	"robot_action: {}".format(_safe_array_str(step.get("robot_action", []))),
	"",
	"selected image tensors:",
	*image_debug_lines,
	]

	if warnings:
	info_lines.append("")
	info_lines.append("Image warnings:")
	info_lines.extend(warnings)

	return gallery_items, status_plot, "\n".join(info_lines)


	def build_app():
	repo_id, filename = resolve_dataset(DEFAULT_PRESET)

	try:
	n_traj = get_num_trajectories(repo_id, filename)
	first_keys = get_available_image_keys(repo_id, filename, 0) if n_traj else []
	startup_warning = ""
	except Exception as exc:
	n_traj = 0
	first_keys = []
	startup_warning = repr(exc)

	default_status = "Loaded default dataset\nDetected trajectories: {}\nreverse_channels default: {}".format(n_traj, int(get_default_reverse_channels(DEFAULT_PRESET)))

	with gr.Blocks(title="HDF5 Trajectory Viewer") as demo:
	gr.Markdown(
	"# HDF5 Trajectory Viewer\n\n"
	"Standalone viewer for TrajectoryBuffer-style HDF5 datasets on Hugging Face.\n\n"
	"The status plot matches the local labeling view: orange `no_teacher_action`, green valid-start markers, and a black timestep cursor."
	)

	if startup_warning:
	gr.Markdown("Startup warning: `{}`".format(startup_warning))

	with gr.Row():
	preset = gr.Dropdown(
	choices=list(DATASET_PRESETS.keys()) + ["Custom"],
	value=DEFAULT_PRESET,
	label="Dataset preset",
	)
	custom_repo_id = gr.Textbox(value="", label="Custom repo_id, e.g. Zhaoting123/InsertT", visible=False)
	custom_filename = gr.Textbox(value="", label="Custom HDF5 path in repo", visible=False)

	dataset_status = gr.Textbox(label="Dataset status", lines=2, value=default_status, interactive=False)

	with gr.Row():
	traj_slider = gr.Slider(minimum=0, maximum=max(n_traj - 1, 1), value=0, step=1, label="Trajectory index")
	timestep_slider = gr.Slider(minimum=0, maximum=1, value=0, step=1, label="Timestep")

	with gr.Row():
	image_keys = gr.CheckboxGroup(choices=first_keys, value=first_keys[:2], label="Image keys")
	chunk_len = gr.Slider(minimum=1, maximum=64, value=DEFAULT_CHUNK_LEN, step=1, label="Valid-window length")
	display_scale = gr.State(value=DEFAULT_DISPLAY_SCALE)
	reverse_channels = gr.Checkbox(value=get_default_reverse_channels(DEFAULT_PRESET), label="Reverse channels BGR↔RGB")

	with gr.Row():
	render_btn = gr.Button("Render frame", variant="primary")
	preload_btn = gr.Button("Preload current trajectory")
	video_btn = gr.Button("Build trajectory video")
	video_fps = gr.Slider(minimum=1, maximum=30, value=10, step=1, label="Video FPS")
	video_stride = gr.Slider(minimum=1, maximum=10, value=4, step=1, label="Video frame stride")

	preload_status = gr.Textbox(label="Preload / video status", lines=4, value="Not preloaded yet.", interactive=False)

	with gr.Row():
	with gr.Column(scale=3):
	gallery = gr.Gallery(
	label="Camera images",
	columns=2,
	height=360,
	object_fit="contain",
	)
	with gr.Column(scale=2):
	status_plot = gr.Image(
	label="no_teacher_action + valid starts",
	type="numpy",
	height=360,
	)

	trajectory_video = gr.Video(label="Trajectory video: smooth browser-side playback")
	info = gr.Textbox(label="Frame info", lines=16)

	with gr.Accordion("Debug: HDF5 tree", open=False):
	inspect_btn = gr.Button("Inspect HDF5 structure")
	hdf5_tree = gr.Textbox(lines=24, label="HDF5 tree")

	preset.change(
	fn=update_custom_visibility,
	inputs=preset,
	outputs=[custom_repo_id, custom_filename],
	).then(
	fn=update_after_dataset_change,
	inputs=[preset, custom_repo_id, custom_filename],
	outputs=[traj_slider, timestep_slider, image_keys, dataset_status, reverse_channels],
	).then(
	fn=render_frame,
	inputs=[preset, custom_repo_id, custom_filename, traj_slider, timestep_slider, image_keys, chunk_len, display_scale, reverse_channels],
	outputs=[gallery, status_plot, info],
	)

	custom_repo_id.submit(
	fn=update_after_dataset_change,
	inputs=[preset, custom_repo_id, custom_filename],
	outputs=[traj_slider, timestep_slider, image_keys, dataset_status, reverse_channels],
	)
	custom_filename.submit(
	fn=update_after_dataset_change,
	inputs=[preset, custom_repo_id, custom_filename],
	outputs=[traj_slider, timestep_slider, image_keys, dataset_status, reverse_channels],
	)

	traj_slider.change(
	fn=update_after_traj_change,
	inputs=[preset, custom_repo_id, custom_filename, traj_slider],
	outputs=[timestep_slider, image_keys],
	).then(
	fn=render_frame,
	inputs=[preset, custom_repo_id, custom_filename, traj_slider, timestep_slider, image_keys, chunk_len, display_scale, reverse_channels],
	outputs=[gallery, status_plot, info],
	)

	timestep_slider.release(
	fn=render_frame,
	inputs=[preset, custom_repo_id, custom_filename, traj_slider, timestep_slider, image_keys, chunk_len, display_scale, reverse_channels],
	outputs=[gallery, status_plot, info],
	)

	for widget in [image_keys, chunk_len, reverse_channels]:
	widget.change(
	fn=render_frame,
	inputs=[preset, custom_repo_id, custom_filename, traj_slider, timestep_slider, image_keys, chunk_len, display_scale, reverse_channels],
	outputs=[gallery, status_plot, info],
	)

	render_btn.click(
	fn=render_frame,
	inputs=[preset, custom_repo_id, custom_filename, traj_slider, timestep_slider, image_keys, chunk_len, display_scale, reverse_channels],
	outputs=[gallery, status_plot, info],
	)

	preload_btn.click(
	fn=preload_current_trajectory,
	inputs=[preset, custom_repo_id, custom_filename, traj_slider, image_keys, chunk_len, display_scale, reverse_channels],
	outputs=preload_status,
	)

	video_btn.click(
	fn=build_current_trajectory_video,
	inputs=[preset, custom_repo_id, custom_filename, traj_slider, image_keys, display_scale, reverse_channels, video_fps, chunk_len, video_stride],
	outputs=[trajectory_video, preload_status],
	)

	inspect_btn.click(
	fn=inspect_hdf5_tree,
	inputs=[preset, custom_repo_id, custom_filename],
	outputs=hdf5_tree,
	)

	demo.load(
	fn=update_after_dataset_change,
	inputs=[preset, custom_repo_id, custom_filename],
	outputs=[traj_slider, timestep_slider, image_keys, dataset_status, reverse_channels],
	).then(
	fn=render_frame,
	inputs=[preset, custom_repo_id, custom_filename, traj_slider, timestep_slider, image_keys, chunk_len, display_scale, reverse_channels],
	outputs=[gallery, status_plot, info],
	)

	return demo


	if __name__ == "__main__":
	demo = build_app()
	demo.launch(
	server_name="0.0.0.0",
	server_port=int(os.environ.get("PORT", 7860)),
	share=False,
	ssr_mode=False,
	)