Spaces:

Daankular
/

MeshForge

Runtime error

App Files Files Community

Daankular commited on 12 days ago

Commit

b6c4195

verified ·

1 Parent(s): ffdf67b

Upload pipeline/tpose_smpl.py with huggingface_hub

Browse files

Files changed (1) hide show

pipeline/tpose_smpl.py +413 -0

pipeline/tpose_smpl.py ADDED Viewed

	@@ -0,0 +1,413 @@

+"""
+tpose_smpl.py -- T-pose a humanoid GLB via inverse Linear Blend Skinning.
+Pipeline:
+  1. Render front view and run HMR2 -> SMPL body_pose + betas
+  2. Read rigged.glb: mesh verts (rig world space), skinning weights, T-pose joints
+  3. Compute FK transforms in rig world space using HMR2 body_pose
+  4. Apply inverse LBS: v_tpose = (Sum_j W_j * A_j)^-1 * v_posed
+  5. Map T-posed verts back to original mesh coordinate space, preserve UV/texture
+  6. Optionally export SKEL bone mesh in T-pose
+Usage:
+    python tpose_smpl.py --body /tmp/triposg_textured.glb \
+                         --rig  /tmp/rig_out/rigged.glb \
+                         --out  /tmp/tposed_surface.glb \
+                         [--skel_out /tmp/tposed_bones.glb] \
+                         [--debug_dir /tmp/tpose_debug]
+"""
+import os, sys, argparse, struct, json, warnings
+warnings.filterwarnings('ignore')
+import numpy as np
+import cv2
+import torch
+import trimesh
+from trimesh.visual.texture import TextureVisuals
+from trimesh.visual.material import PBRMaterial
+from scipy.spatial.transform import Rotation as R
+sys.path.insert(0, '/root/MV-Adapter')
+SMPL_NEUTRAL = '/root/body_models/smpl/SMPL_NEUTRAL.pkl'
+SKEL_DIR     = '/root/body_models/skel'
+SMPL_PARENTS = [-1, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9,
+                12, 13, 14, 16, 17, 18, 19, 20, 21]
+# ---- Step 1: Render front view -----------------------------------------------
+def render_front(body_glb, H=1024, W=768, device='cuda'):
+    from mvadapter.utils.mesh_utils import (
+        NVDiffRastContextWrapper, load_mesh, get_orthogonal_camera, render,
+    )
+    ctx     = NVDiffRastContextWrapper(device=device, context_type='cuda')
+    mesh_mv = load_mesh(body_glb, rescale=True, device=device)
+    camera  = get_orthogonal_camera(
+        elevation_deg=[0], distance=[1.8],
+        left=-0.55, right=0.55, bottom=-0.55, top=0.55,
+        azimuth_deg=[-90], device=device,
+    )
+    out = render(ctx, mesh_mv, camera, height=H, width=W,
+                 render_attr=True, render_depth=False, render_normal=False,
+                 attr_background=0.5)
+    img_np = (out.attr[0].cpu().numpy() * 255).clip(0, 255).astype(np.uint8)
+    return cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
+# ---- Step 2: HMR2 pose estimation --------------------------------------------
+def run_hmr2(img_bgr, device='cuda'):
+    from pathlib import Path
+    from hmr2.configs import CACHE_DIR_4DHUMANS
+    from hmr2.models import load_hmr2, DEFAULT_CHECKPOINT, download_models
+    from hmr2.utils import recursive_to
+    from hmr2.datasets.vitdet_dataset import ViTDetDataset
+    from hmr2.utils.utils_detectron2 import DefaultPredictor_Lazy
+    from detectron2.config import LazyConfig
+    import hmr2 as hmr2_pkg
+    download_models(CACHE_DIR_4DHUMANS)
+    model, model_cfg = load_hmr2(DEFAULT_CHECKPOINT)
+    model = model.to(device).eval()
+    cfg_path = Path(hmr2_pkg.__file__).parent / 'configs' / 'cascade_mask_rcnn_vitdet_h_75ep.py'
+    det_cfg  = LazyConfig.load(str(cfg_path))
+    det_cfg.train.init_checkpoint = (
+        'https://dl.fbaipublicfiles.com/detectron2/ViTDet/COCO/cascade_mask_rcnn_vitdet_h'
+        '/f328730692/model_final_f05665.pkl'
+    )
+    for i in range(3):
+        det_cfg.model.roi_heads.box_predictors[i].test_score_thresh = 0.25
+    detector = DefaultPredictor_Lazy(det_cfg)
+    det_out   = detector(img_bgr)
+    instances = det_out['instances']
+    valid     = (instances.pred_classes == 0) & (instances.scores > 0.5)
+    boxes     = instances.pred_boxes.tensor[valid].cpu().numpy()
+    if len(boxes) == 0:
+        raise RuntimeError('HMR2: no person detected in render')
+    areas = (boxes[:,2]-boxes[:,0]) * (boxes[:,3]-boxes[:,1])
+    boxes = boxes[areas.argmax():areas.argmax()+1]
+    dataset    = ViTDetDataset(model_cfg, img_bgr, boxes)
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0)
+    for batch in dataloader:
+        batch = recursive_to(batch, device)
+        with torch.no_grad():
+            out = model(batch)
+        sp = out['pred_smpl_params']
+        return {
+            'body_pose': sp['body_pose'][0].cpu(),    # (23, 3, 3)
+            'betas':     sp['betas'][0].cpu(),         # (10,)
+        }
+# ---- Step 3: Read all data from rigged.glb -----------------------------------
+def read_rigged_glb(rig_glb):
+    """
+    Returns dict with:
+      verts        : (N, 3) mesh vertices in rig world space
+      j_idx        : (N, 4) joint indices
+      w_arr        : (N, 4) skinning weights
+      J_bind       : (24, 3) T-pose joint world positions
+    """
+    with open(rig_glb, 'rb') as fh:
+        raw = fh.read()
+    ch_len, _ = struct.unpack_from('<II', raw, 12)
+    gltf = json.loads(raw[20:20+ch_len])
+    bin_data = raw[20+ch_len+8:]
+    def _read(acc_i):
+        acc = gltf['accessors'][acc_i]
+        bv  = gltf['bufferViews'][acc['bufferView']]
+        off = bv.get('byteOffset', 0) + acc.get('byteOffset', 0)
+        cnt = acc['count']
+        n   = {'SCALAR':1,'VEC2':2,'VEC3':3,'VEC4':4,'MAT4':16}[acc['type']]
+        fmt = {5121:'B',5123:'H',5125:'I',5126:'f'}[acc['componentType']]
+        nb  = {'B':1,'H':2,'I':4,'f':4}[fmt]
+        return np.frombuffer(bin_data[off:off+cnt*n*nb],
+                             dtype=np.dtype(fmt)).reshape(cnt, n)
+    prim  = gltf['meshes'][0]['primitives'][0]['attributes']
+    verts = _read(prim['POSITION']).astype(np.float64)   # (N, 3)
+    j_idx = _read(prim['JOINTS_0']).astype(int)          # (N, 4)
+    w_arr = _read(prim['WEIGHTS_0']).astype(np.float64)  # (N, 4)
+    row_sum = w_arr.sum(axis=1, keepdims=True)
+    w_arr /= np.where(row_sum > 0, row_sum, 1.0)
+    # Read T-pose joint world positions by accumulating node translations
+    nodes   = gltf['nodes']
+    skin    = gltf['skins'][0]
+    j_nodes = skin['joints']                             # [0, 1, ..., 23]
+    J_bind  = np.zeros((24, 3), dtype=np.float64)
+    for ji, ni in enumerate(j_nodes):
+        t_local = np.array(nodes[ni].get('translation', [0, 0, 0]))
+        p = SMPL_PARENTS[ji]
+        J_bind[ji] = (J_bind[p] if p >= 0 else np.zeros(3)) + t_local
+    print('  Rig verts: %d  Y: [%.3f, %.3f]  X: [%.3f, %.3f]' % (
+        len(verts),
+        verts[:,1].min(), verts[:,1].max(),
+        verts[:,0].min(), verts[:,0].max()))
+    print('  J_bind pelvis: (%.3f, %.3f, %.3f)  L_shoulder: (%.3f, %.3f, %.3f)' % (
+        *J_bind[0], *J_bind[16]))
+    return {'verts': verts, 'j_idx': j_idx, 'w_arr': w_arr, 'J_bind': J_bind}
+# ---- Step 4: FK in rig world space -> A matrices -----------------------------
+_FLIP_X = np.diag([-1.0, 1.0, 1.0])   # X-axis mirror matrix
+def _adapt_rotmat_to_flipped_x(R_smpl):
+    """
+    Convert an SO(3) rotation matrix from SMPL convention (left=+X)
+    to rig convention (left=-X).  F @ R @ F  where F = diag(-1,1,1).
+    """
+    return _FLIP_X @ R_smpl @ _FLIP_X
+def compute_rig_fk_transforms(J_bind, body_pose_rotmats):
+    """
+    Compute A_j = G_j_posed * IBM_j in rig world space.
+    A_j maps T-pose -> posed, so A_j^{-1} maps posed -> T-pose.
+    HMR2 returns rotations in SMPL convention (left shoulder at +X).
+    The rig uses the opposite convention (left shoulder at -X).
+    We convert by conjugating with the X-flip matrix before building FK.
+    J_bind          : (24, 3) T-pose joint world positions from rig
+    body_pose_rotmats: (23, 3, 3) HMR2 body pose rotation matrices (joints 1-23)
+    Returns A: (24, 4, 4)
+    """
+    G = [None] * 24
+    for j in range(24):
+        p = SMPL_PARENTS[j]
+        # Convert rotation from SMPL (+X=left) to rig (-X=left) convention
+        R_smpl = body_pose_rotmats[j-1].numpy() if j >= 1 else np.eye(3)
+        R_j    = _adapt_rotmat_to_flipped_x(R_smpl)
+        if p < 0:
+            t_j = J_bind[j]           # root: absolute world position
+        else:
+            t_j = J_bind[j] - J_bind[p]
+        L = np.eye(4, dtype=np.float64)
+        L[:3, :3] = R_j
+        L[:3, 3]  = t_j
+        G[j] = L if p < 0 else G[p] @ L
+    G = np.stack(G)
+    A = np.zeros((24, 4, 4), dtype=np.float64)
+    for j in range(24):
+        IBM = np.eye(4, dtype=np.float64)
+        IBM[:3, 3] = -J_bind[j]
+        A[j] = G[j] @ IBM
+    return A
+# ---- Step 5: Inverse LBS -----------------------------------------------------
+def inverse_lbs(verts, j_idx, w_arr, A):
+    """
+    v_tpose = (Sum_j W_j * A_j)^{-1} * v_posed
+    All inputs in rig world space.
+    Returns (N, 3) T-posed vertices.
+    """
+    N = len(verts)
+    # Blend forward transforms
+    T_fwd = np.zeros((N, 4, 4), dtype=np.float64)
+    for k in range(4):
+        ji   = j_idx[:, k]
+        w    = w_arr[:, k]
+        mask = w > 1e-6
+        if mask.any():
+            T_fwd[mask] += w[mask, None, None] * A[ji[mask]]
+    T_inv = np.linalg.inv(T_fwd)
+    v_h   = np.concatenate([verts, np.ones((N, 1))], axis=1)
+    v_tp  = np.einsum('nij,nj->ni', T_inv, v_h)[:, :3]
+    disp  = np.linalg.norm(v_tp - verts, axis=1)
+    print('  inverse LBS: mean_disp=%.4f  max_disp=%.4f' % (disp.mean(), disp.max()))
+    return v_tp
+# ---- Step 6: Map T-posed rig verts back to original mesh space ---------------
+def rig_to_original_space(rig_verts_tposed, rig_verts_original, orig_mesh_verts):
+    """
+    Rig verts are a scaled + translated version of the original mesh verts.
+    Recover the (scale, offset) from the mapping:
+      rig_vert = orig_vert * scale + offset
+    Estimates scale from height ratio, offset from floor alignment.
+    Returns T-posed vertices in original mesh coordinate space.
+    """
+    rig_h  = rig_verts_original[:, 1].max() - rig_verts_original[:, 1].min()
+    orig_h = orig_mesh_verts[:, 1].max()    - orig_mesh_verts[:, 1].min()
+    scale  = rig_h / max(orig_h, 1e-6)
+    # The rig aligns: orig * scale, then v[:,1] -= v[:,1].min() (floor at 0)
+    # and v[:,0] += smpl_joints[0,0] - cx; v[:,2] += smpl_joints[0,2] - cz
+    # We can recover offset from comparing means/floors
+    # offset = rig_floor_Y - (orig_floor_Y * scale)
+    rig_floor  = rig_verts_original[:, 1].min()
+    orig_floor = orig_mesh_verts[:, 1].min()
+    y_offset   = rig_floor - orig_floor * scale
+    # X, Z: center offset
+    rig_cx  = (rig_verts_original[:, 0].max() + rig_verts_original[:, 0].min()) * 0.5
+    orig_cx = (orig_mesh_verts[:, 0].max()    + orig_mesh_verts[:, 0].min())    * 0.5
+    x_offset = rig_cx - orig_cx * scale
+    rig_cz  = (rig_verts_original[:, 2].max() + rig_verts_original[:, 2].min()) * 0.5
+    orig_cz = (orig_mesh_verts[:, 2].max()    + orig_mesh_verts[:, 2].min())    * 0.5
+    z_offset = rig_cz - orig_cz * scale
+    print('  rig->orig: scale=%.4f  offset=[%.3f, %.3f, %.3f]' % (scale, x_offset, y_offset, z_offset))
+    # Invert: orig_vert = (rig_vert - offset) / scale
+    # For T-posed verts: they're in rig space but T-posed, so same inversion
+    tposed_orig = np.zeros_like(rig_verts_tposed)
+    tposed_orig[:, 0] = (rig_verts_tposed[:, 0] - x_offset) / scale
+    tposed_orig[:, 1] = (rig_verts_tposed[:, 1] - y_offset) / scale
+    tposed_orig[:, 2] = (rig_verts_tposed[:, 2] - z_offset) / scale
+    return tposed_orig
+# ---- SKEL bone geometry ------------------------------------------------------
+def export_skel_bones(betas, out_path, gender='male'):
+    try:
+        from skel.skel_model import SKEL
+    except ImportError:
+        print('  [skel] Not installed')
+        return None
+    skel_file = os.path.join(SKEL_DIR, 'skel_%s.pkl' % gender)
+    if not os.path.exists(skel_file):
+        print('  [skel] Weights not found: %s' % skel_file)
+        return None
+    try:
+        skel_model = SKEL(gender=gender, model_path=SKEL_DIR)
+        betas_t    = betas.unsqueeze(0)[:, :10]
+        poses_zero = torch.zeros(1, 46)
+        trans_zero = torch.zeros(1, 3)
+        with torch.no_grad():
+            out = skel_model(poses=poses_zero, betas=betas_t, trans=trans_zero, skelmesh=True)
+        bone_verts = out.skel_verts[0].numpy()
+        bone_faces = skel_model.skel_f.numpy()
+        mesh = trimesh.Trimesh(vertices=bone_verts, faces=bone_faces, process=False)
+        mesh.export(out_path)
+        print('  [skel] Bone mesh -> %s  (%d verts)' % (out_path, len(bone_verts)))
+        return out_path
+    except Exception as e:
+        print('  [skel] Export failed: %s' % e)
+        return None
+# ---- Main --------------------------------------------------------------------
+def tpose_smpl(body_glb, out_glb, rig_glb=None, debug_dir=None, skel_out=None):
+    device = 'cuda'
+    if not rig_glb or not os.path.exists(rig_glb):
+        raise RuntimeError('--rig is required: provide the rigged.glb from the Rig step.')
+    print('[tpose_smpl] Rendering front view ...')
+    img_bgr = render_front(body_glb, device=device)
+    if debug_dir:
+        cv2.imwrite(os.path.join(debug_dir, 'tpose_render.png'), img_bgr)
+    print('[tpose_smpl] Running HMR2 pose estimation ...')
+    hmr2_out = run_hmr2(img_bgr, device=device)
+    print('  betas: %s' % hmr2_out['betas'].numpy().round(3))
+    print('[tpose_smpl] Reading rigged GLB (rig world space) ...')
+    rig_data = read_rigged_glb(rig_glb)
+    print('[tpose_smpl] Loading original mesh for UV/texture ...')
+    scene = trimesh.load(body_glb)
+    if isinstance(scene, trimesh.Scene):
+        geom_name = list(scene.geometry.keys())[0]
+        orig_mesh  = scene.geometry[geom_name]
+    else:
+        orig_mesh = scene; geom_name = None
+    orig_verts = np.array(orig_mesh.vertices, dtype=np.float64)
+    uvs        = np.array(orig_mesh.visual.uv, dtype=np.float64)
+    orig_tex   = orig_mesh.visual.material.baseColorTexture
+    print('  Orig mesh: %d verts  Y: [%.3f, %.3f]  X: [%.3f, %.3f]' % (
+        len(orig_verts),
+        orig_verts[:,1].min(), orig_verts[:,1].max(),
+        orig_verts[:,0].min(), orig_verts[:,0].max()))
+    print('[tpose_smpl] Computing FK transforms in rig world space ...')
+    body_pose_rotmats = hmr2_out['body_pose']   # (23, 3, 3)
+    A = compute_rig_fk_transforms(rig_data['J_bind'], body_pose_rotmats)
+    # Verify zero-pose gives identity (sanity check)
+    A_zero = compute_rig_fk_transforms(rig_data['J_bind'],
+                                        torch.zeros(23, 3, 3) + torch.eye(3))
+    v_test = rig_data['verts'][:3]
+    v_h = np.concatenate([v_test, np.ones((3,1))], axis=1)
+    T_fwd_test = np.zeros((3, 4, 4))
+    for k in range(4):
+        ji = rig_data['j_idx'][:3, k]; w = rig_data['w_arr'][:3, k]
+        T_fwd_test += w[:, None, None] * A_zero[ji]
+    identity_err = np.abs(T_fwd_test - np.eye(4)).max()
+    print('  zero-pose identity check: max_err=%.6f (expect ~0)' % identity_err)
+    print('[tpose_smpl] Applying inverse LBS ...')
+    rig_verts_tposed = inverse_lbs(
+        rig_data['verts'], rig_data['j_idx'], rig_data['w_arr'], A)
+    print('[tpose_smpl] T-posed rig verts: Y: [%.3f, %.3f]  X: [%.3f, %.3f]' % (
+        rig_verts_tposed[:,1].min(), rig_verts_tposed[:,1].max(),
+        rig_verts_tposed[:,0].min(), rig_verts_tposed[:,0].max()))
+    print('[tpose_smpl] Mapping back to original mesh coordinate space ...')
+    tposed_orig = rig_to_original_space(
+        rig_verts_tposed, rig_data['verts'], orig_verts)
+    print('[tpose_smpl] T-posed orig: Y: [%.3f, %.3f]  X: [%.3f, %.3f]' % (
+        tposed_orig[:,1].min(), tposed_orig[:,1].max(),
+        tposed_orig[:,0].min(), tposed_orig[:,0].max()))
+    orig_mesh.vertices = tposed_orig
+    orig_mesh.visual = TextureVisuals(uv=uvs,
+                                      material=PBRMaterial(baseColorTexture=orig_tex))
+    if geom_name and isinstance(scene, trimesh.Scene):
+        scene.geometry[geom_name] = orig_mesh
+        scene.export(out_glb)
+    else:
+        orig_mesh.export(out_glb)
+    print('[tpose_smpl] Saved: %s  (%d KB)' % (out_glb, os.path.getsize(out_glb)//1024))
+    if skel_out:
+        print('[tpose_smpl] Exporting SKEL bone geometry ...')
+        export_skel_bones(hmr2_out['betas'], skel_out)
+    return out_glb
+if __name__ == '__main__':
+    ap = argparse.ArgumentParser()
+    ap.add_argument('--body',      required=True)
+    ap.add_argument('--out',       required=True)
+    ap.add_argument('--rig',       required=True, help='Rigged GLB from rig step')
+    ap.add_argument('--skel_out',  default=None,  help='SKEL BSM bone mesh output')
+    ap.add_argument('--debug_dir', default=None)
+    args = ap.parse_args()
+    os.makedirs(args.debug_dir, exist_ok=True) if args.debug_dir else None
+    tpose_smpl(args.body, args.out, rig_glb=args.rig,
+               debug_dir=args.debug_dir, skel_out=args.skel_out)