Spaces:
Running on Zero
Running on Zero
File size: 8,001 Bytes
8f1bcd9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | """
smpl.py
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
Parse HumanML3D [T, 263] feature vectors into structured SMPL motion data.
HumanML3D 263-dim layout per frame
[0] root angular-velocity (Y-axis, rad/frame)
[1] root height Y (metres)
[2:4] root XZ velocity (local-frame, metres/frame)
[4:67] joint local positions joints 1-21 relative to root, 21Γ3 (unused here)
[67:193] 6D joint rotations joints 1-21, 21Γ6
[193:259] joint velocities joints 0-21, 22Γ3 (unused here)
[259:263] foot contact flags (unused here)
Root rotation = cumulative integral of dim[0] β Y-axis quaternion.
Root position = dim[1] (height) + integrated XZ velocity.
Joint 1-21 rot = dims 67:193 as 6D continuous rotation representation
[Zhou et al. 2019] β Gram-Schmidt β 3Γ3 rotation matrix β quaternion.
These are LOCAL rotations relative to the SMPL parent joint's rest
frame, where the canonical T-pose is the zero (identity) rotation.
"""
from __future__ import annotations
import numpy as np
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 6D rotation helpers
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def rot6d_to_matrix(r6d: np.ndarray) -> np.ndarray:
"""
[..., 6] β [..., 3, 3]
Reconstructs a rotation matrix from two columns using Gram-Schmidt.
The two columns are [a1 = r6d[..., 0:3], a2 = r6d[..., 3:6]].
"""
a1 = r6d[..., 0:3].astype(np.float64)
a2 = r6d[..., 3:6].astype(np.float64)
b1 = a1 / (np.linalg.norm(a1, axis=-1, keepdims=True) + 1e-12)
b2 = a2 - (b1 * a2).sum(axis=-1, keepdims=True) * b1
b2 = b2 / (np.linalg.norm(b2, axis=-1, keepdims=True) + 1e-12)
b3 = np.cross(b1, b2)
return np.stack([b1, b2, b3], axis=-1) # columns β [..., 3, 3]
def matrix_to_quat(mat: np.ndarray) -> np.ndarray:
"""
[..., 3, 3] β [..., 4] WXYZ quaternion, positive-W convention.
Uses scipy for numerical stability.
"""
from scipy.spatial.transform import Rotation
shape = mat.shape[:-2]
flat = mat.reshape(-1, 3, 3).astype(np.float64)
xyzw = Rotation.from_matrix(flat).as_quat() # scipy β XYZW
wxyz = xyzw[:, [3, 0, 1, 2]].astype(np.float32)
wxyz[wxyz[:, 0] < 0] *= -1 # positive-W
return wxyz.reshape(*shape, 4)
def rot6d_to_quat(r6d: np.ndarray) -> np.ndarray:
"""[..., 6] β [..., 4] WXYZ. Convenience: 6D β matrix β quaternion."""
return matrix_to_quat(rot6d_to_matrix(r6d))
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Root motion recovery
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def _qrot_vec(q: np.ndarray, v: np.ndarray) -> np.ndarray:
"""Rotate [N, 3] vectors by [N, 4] WXYZ quaternions (batch)."""
w, x, y, z = q[:, 0:1], q[:, 1:2], q[:, 2:3], q[:, 3:4]
vx, vy, vz = v[:, 0:1], v[:, 1:2], v[:, 2:3]
# Rodrigues-style: v + 2w*(q.xyz Γ v) + 2*(q.xyz Γ (q.xyz Γ v))
tx = 2 * (y * vz - z * vy)
ty = 2 * (z * vx - x * vz)
tz = 2 * (x * vy - y * vx)
return np.concatenate([
vx + w * tx + y * tz - z * ty,
vy + w * ty + z * tx - x * tz,
vz + w * tz + x * ty - y * tx,
], axis=-1)
def recover_root_motion(data: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
"""
Recover root world-space position and rotation from [T, 263] features.
Returns
-------
root_pos : [T, 3] world-space root position (Y = height above ground)
root_rot : [T, 4] WXYZ quaternion β Y-axis only (global facing direction)
"""
T = data.shape[0]
# Facing direction: integrate Y-axis angular velocity
theta = np.cumsum(data[:, 0].astype(np.float32))
half = theta * 0.5
root_rot = np.zeros((T, 4), dtype=np.float32)
root_rot[:, 0] = np.cos(half)
root_rot[:, 2] = np.sin(half)
# XZ velocity encoded in root-local frame β world frame
vel_local = np.stack([
data[:, 2].astype(np.float32),
np.zeros(T, dtype=np.float32),
data[:, 3].astype(np.float32),
], axis=-1)
vel_world = _qrot_vec(root_rot, vel_local)
root_pos = np.zeros((T, 3), dtype=np.float32)
root_pos[:, 0] = np.cumsum(vel_world[:, 0])
root_pos[:, 1] = data[:, 1]
root_pos[:, 2] = np.cumsum(vel_world[:, 2])
return root_pos, root_rot
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# SMPLMotion container
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class SMPLMotion:
"""
Structured SMPL motion data parsed from a single HumanML3D clip.
Attributes
----------
root_pos : [T, 3] world-space root position (metres)
root_rot : [T, 4] WXYZ root Y-axis rotation (global facing)
local_rot : [T, 21, 4] WXYZ local quaternions for joints 1-21
T-pose = identity; relative to SMPL parent frame
fps : float capture frame rate (20 for HumanML3D)
"""
def __init__(
self,
root_pos: np.ndarray,
root_rot: np.ndarray,
local_rot: np.ndarray,
fps: float = 20.0,
):
self.root_pos = np.asarray(root_pos, dtype=np.float32)
self.root_rot = np.asarray(root_rot, dtype=np.float32)
self.local_rot = np.asarray(local_rot, dtype=np.float32)
self.fps = float(fps)
@property
def num_frames(self) -> int:
return self.root_pos.shape[0]
def slice(self, start: int = 0, end: int = -1) -> "SMPLMotion":
e = end if end > 0 else self.num_frames
return SMPLMotion(
self.root_pos[start:e],
self.root_rot[start:e],
self.local_rot[start:e],
self.fps,
)
def hml3d_to_smpl_motion(data: np.ndarray, fps: float = 20.0) -> SMPLMotion:
"""
Convert HumanML3D [T, 263] feature array to a SMPLMotion.
Uses the actual 6D rotation data (dims 67:193) β NOT position-derived
rotations. This preserves twist and gives physically correct limb poses.
Parameters
----------
data : [T, 263] raw HumanML3D features (e.g. from MoMask or dataset row)
fps : float frame rate (default 20 = HumanML3D native)
"""
data = np.asarray(data, dtype=np.float32)
if data.ndim != 2 or data.shape[1] < 193:
raise ValueError(f"Expected [T, >=193] but got {data.shape}")
T = data.shape[0]
root_pos, root_rot = recover_root_motion(data)
# 6D rotations for joints 1-21: dims [67:193] β [T, 21, 6]
r6d = data[:, 67:193].reshape(T, 21, 6)
local_rot = rot6d_to_quat(r6d) # [T, 21, 4] WXYZ
return SMPLMotion(root_pos, root_rot, local_rot, fps)
|