Daankular commited on
Commit
b6c4195
·
verified ·
1 Parent(s): ffdf67b

Upload pipeline/tpose_smpl.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. pipeline/tpose_smpl.py +413 -0
pipeline/tpose_smpl.py ADDED
@@ -0,0 +1,413 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ tpose_smpl.py -- T-pose a humanoid GLB via inverse Linear Blend Skinning.
3
+
4
+ Pipeline:
5
+ 1. Render front view and run HMR2 -> SMPL body_pose + betas
6
+ 2. Read rigged.glb: mesh verts (rig world space), skinning weights, T-pose joints
7
+ 3. Compute FK transforms in rig world space using HMR2 body_pose
8
+ 4. Apply inverse LBS: v_tpose = (Sum_j W_j * A_j)^-1 * v_posed
9
+ 5. Map T-posed verts back to original mesh coordinate space, preserve UV/texture
10
+ 6. Optionally export SKEL bone mesh in T-pose
11
+
12
+ Usage:
13
+ python tpose_smpl.py --body /tmp/triposg_textured.glb \
14
+ --rig /tmp/rig_out/rigged.glb \
15
+ --out /tmp/tposed_surface.glb \
16
+ [--skel_out /tmp/tposed_bones.glb] \
17
+ [--debug_dir /tmp/tpose_debug]
18
+ """
19
+
20
+ import os, sys, argparse, struct, json, warnings
21
+ warnings.filterwarnings('ignore')
22
+
23
+ import numpy as np
24
+ import cv2
25
+ import torch
26
+ import trimesh
27
+ from trimesh.visual.texture import TextureVisuals
28
+ from trimesh.visual.material import PBRMaterial
29
+ from scipy.spatial.transform import Rotation as R
30
+
31
+ sys.path.insert(0, '/root/MV-Adapter')
32
+ SMPL_NEUTRAL = '/root/body_models/smpl/SMPL_NEUTRAL.pkl'
33
+ SKEL_DIR = '/root/body_models/skel'
34
+
35
+ SMPL_PARENTS = [-1, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9,
36
+ 12, 13, 14, 16, 17, 18, 19, 20, 21]
37
+
38
+
39
+ # ---- Step 1: Render front view -----------------------------------------------
40
+
41
+ def render_front(body_glb, H=1024, W=768, device='cuda'):
42
+ from mvadapter.utils.mesh_utils import (
43
+ NVDiffRastContextWrapper, load_mesh, get_orthogonal_camera, render,
44
+ )
45
+ ctx = NVDiffRastContextWrapper(device=device, context_type='cuda')
46
+ mesh_mv = load_mesh(body_glb, rescale=True, device=device)
47
+ camera = get_orthogonal_camera(
48
+ elevation_deg=[0], distance=[1.8],
49
+ left=-0.55, right=0.55, bottom=-0.55, top=0.55,
50
+ azimuth_deg=[-90], device=device,
51
+ )
52
+ out = render(ctx, mesh_mv, camera, height=H, width=W,
53
+ render_attr=True, render_depth=False, render_normal=False,
54
+ attr_background=0.5)
55
+ img_np = (out.attr[0].cpu().numpy() * 255).clip(0, 255).astype(np.uint8)
56
+ return cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
57
+
58
+
59
+ # ---- Step 2: HMR2 pose estimation --------------------------------------------
60
+
61
+ def run_hmr2(img_bgr, device='cuda'):
62
+ from pathlib import Path
63
+ from hmr2.configs import CACHE_DIR_4DHUMANS
64
+ from hmr2.models import load_hmr2, DEFAULT_CHECKPOINT, download_models
65
+ from hmr2.utils import recursive_to
66
+ from hmr2.datasets.vitdet_dataset import ViTDetDataset
67
+ from hmr2.utils.utils_detectron2 import DefaultPredictor_Lazy
68
+ from detectron2.config import LazyConfig
69
+ import hmr2 as hmr2_pkg
70
+
71
+ download_models(CACHE_DIR_4DHUMANS)
72
+ model, model_cfg = load_hmr2(DEFAULT_CHECKPOINT)
73
+ model = model.to(device).eval()
74
+
75
+ cfg_path = Path(hmr2_pkg.__file__).parent / 'configs' / 'cascade_mask_rcnn_vitdet_h_75ep.py'
76
+ det_cfg = LazyConfig.load(str(cfg_path))
77
+ det_cfg.train.init_checkpoint = (
78
+ 'https://dl.fbaipublicfiles.com/detectron2/ViTDet/COCO/cascade_mask_rcnn_vitdet_h'
79
+ '/f328730692/model_final_f05665.pkl'
80
+ )
81
+ for i in range(3):
82
+ det_cfg.model.roi_heads.box_predictors[i].test_score_thresh = 0.25
83
+ detector = DefaultPredictor_Lazy(det_cfg)
84
+
85
+ det_out = detector(img_bgr)
86
+ instances = det_out['instances']
87
+ valid = (instances.pred_classes == 0) & (instances.scores > 0.5)
88
+ boxes = instances.pred_boxes.tensor[valid].cpu().numpy()
89
+ if len(boxes) == 0:
90
+ raise RuntimeError('HMR2: no person detected in render')
91
+
92
+ areas = (boxes[:,2]-boxes[:,0]) * (boxes[:,3]-boxes[:,1])
93
+ boxes = boxes[areas.argmax():areas.argmax()+1]
94
+
95
+ dataset = ViTDetDataset(model_cfg, img_bgr, boxes)
96
+ dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0)
97
+ for batch in dataloader:
98
+ batch = recursive_to(batch, device)
99
+ with torch.no_grad():
100
+ out = model(batch)
101
+ sp = out['pred_smpl_params']
102
+ return {
103
+ 'body_pose': sp['body_pose'][0].cpu(), # (23, 3, 3)
104
+ 'betas': sp['betas'][0].cpu(), # (10,)
105
+ }
106
+
107
+
108
+ # ---- Step 3: Read all data from rigged.glb -----------------------------------
109
+
110
+ def read_rigged_glb(rig_glb):
111
+ """
112
+ Returns dict with:
113
+ verts : (N, 3) mesh vertices in rig world space
114
+ j_idx : (N, 4) joint indices
115
+ w_arr : (N, 4) skinning weights
116
+ J_bind : (24, 3) T-pose joint world positions
117
+ """
118
+ with open(rig_glb, 'rb') as fh:
119
+ raw = fh.read()
120
+ ch_len, _ = struct.unpack_from('<II', raw, 12)
121
+ gltf = json.loads(raw[20:20+ch_len])
122
+ bin_data = raw[20+ch_len+8:]
123
+
124
+ def _read(acc_i):
125
+ acc = gltf['accessors'][acc_i]
126
+ bv = gltf['bufferViews'][acc['bufferView']]
127
+ off = bv.get('byteOffset', 0) + acc.get('byteOffset', 0)
128
+ cnt = acc['count']
129
+ n = {'SCALAR':1,'VEC2':2,'VEC3':3,'VEC4':4,'MAT4':16}[acc['type']]
130
+ fmt = {5121:'B',5123:'H',5125:'I',5126:'f'}[acc['componentType']]
131
+ nb = {'B':1,'H':2,'I':4,'f':4}[fmt]
132
+ return np.frombuffer(bin_data[off:off+cnt*n*nb],
133
+ dtype=np.dtype(fmt)).reshape(cnt, n)
134
+
135
+ prim = gltf['meshes'][0]['primitives'][0]['attributes']
136
+ verts = _read(prim['POSITION']).astype(np.float64) # (N, 3)
137
+ j_idx = _read(prim['JOINTS_0']).astype(int) # (N, 4)
138
+ w_arr = _read(prim['WEIGHTS_0']).astype(np.float64) # (N, 4)
139
+ row_sum = w_arr.sum(axis=1, keepdims=True)
140
+ w_arr /= np.where(row_sum > 0, row_sum, 1.0)
141
+
142
+ # Read T-pose joint world positions by accumulating node translations
143
+ nodes = gltf['nodes']
144
+ skin = gltf['skins'][0]
145
+ j_nodes = skin['joints'] # [0, 1, ..., 23]
146
+ J_bind = np.zeros((24, 3), dtype=np.float64)
147
+ for ji, ni in enumerate(j_nodes):
148
+ t_local = np.array(nodes[ni].get('translation', [0, 0, 0]))
149
+ p = SMPL_PARENTS[ji]
150
+ J_bind[ji] = (J_bind[p] if p >= 0 else np.zeros(3)) + t_local
151
+
152
+ print(' Rig verts: %d Y: [%.3f, %.3f] X: [%.3f, %.3f]' % (
153
+ len(verts),
154
+ verts[:,1].min(), verts[:,1].max(),
155
+ verts[:,0].min(), verts[:,0].max()))
156
+ print(' J_bind pelvis: (%.3f, %.3f, %.3f) L_shoulder: (%.3f, %.3f, %.3f)' % (
157
+ *J_bind[0], *J_bind[16]))
158
+ return {'verts': verts, 'j_idx': j_idx, 'w_arr': w_arr, 'J_bind': J_bind}
159
+
160
+
161
+ # ---- Step 4: FK in rig world space -> A matrices -----------------------------
162
+
163
+ _FLIP_X = np.diag([-1.0, 1.0, 1.0]) # X-axis mirror matrix
164
+
165
+
166
+ def _adapt_rotmat_to_flipped_x(R_smpl):
167
+ """
168
+ Convert an SO(3) rotation matrix from SMPL convention (left=+X)
169
+ to rig convention (left=-X). F @ R @ F where F = diag(-1,1,1).
170
+ """
171
+ return _FLIP_X @ R_smpl @ _FLIP_X
172
+
173
+
174
+ def compute_rig_fk_transforms(J_bind, body_pose_rotmats):
175
+ """
176
+ Compute A_j = G_j_posed * IBM_j in rig world space.
177
+ A_j maps T-pose -> posed, so A_j^{-1} maps posed -> T-pose.
178
+
179
+ HMR2 returns rotations in SMPL convention (left shoulder at +X).
180
+ The rig uses the opposite convention (left shoulder at -X).
181
+ We convert by conjugating with the X-flip matrix before building FK.
182
+
183
+ J_bind : (24, 3) T-pose joint world positions from rig
184
+ body_pose_rotmats: (23, 3, 3) HMR2 body pose rotation matrices (joints 1-23)
185
+ Returns A: (24, 4, 4)
186
+ """
187
+ G = [None] * 24
188
+ for j in range(24):
189
+ p = SMPL_PARENTS[j]
190
+ # Convert rotation from SMPL (+X=left) to rig (-X=left) convention
191
+ R_smpl = body_pose_rotmats[j-1].numpy() if j >= 1 else np.eye(3)
192
+ R_j = _adapt_rotmat_to_flipped_x(R_smpl)
193
+
194
+ if p < 0:
195
+ t_j = J_bind[j] # root: absolute world position
196
+ else:
197
+ t_j = J_bind[j] - J_bind[p]
198
+
199
+ L = np.eye(4, dtype=np.float64)
200
+ L[:3, :3] = R_j
201
+ L[:3, 3] = t_j
202
+
203
+ G[j] = L if p < 0 else G[p] @ L
204
+
205
+ G = np.stack(G)
206
+
207
+ A = np.zeros((24, 4, 4), dtype=np.float64)
208
+ for j in range(24):
209
+ IBM = np.eye(4, dtype=np.float64)
210
+ IBM[:3, 3] = -J_bind[j]
211
+ A[j] = G[j] @ IBM
212
+
213
+ return A
214
+
215
+
216
+ # ---- Step 5: Inverse LBS -----------------------------------------------------
217
+
218
+ def inverse_lbs(verts, j_idx, w_arr, A):
219
+ """
220
+ v_tpose = (Sum_j W_j * A_j)^{-1} * v_posed
221
+ All inputs in rig world space.
222
+ Returns (N, 3) T-posed vertices.
223
+ """
224
+ N = len(verts)
225
+ # Blend forward transforms
226
+ T_fwd = np.zeros((N, 4, 4), dtype=np.float64)
227
+ for k in range(4):
228
+ ji = j_idx[:, k]
229
+ w = w_arr[:, k]
230
+ mask = w > 1e-6
231
+ if mask.any():
232
+ T_fwd[mask] += w[mask, None, None] * A[ji[mask]]
233
+
234
+ T_inv = np.linalg.inv(T_fwd)
235
+ v_h = np.concatenate([verts, np.ones((N, 1))], axis=1)
236
+ v_tp = np.einsum('nij,nj->ni', T_inv, v_h)[:, :3]
237
+
238
+ disp = np.linalg.norm(v_tp - verts, axis=1)
239
+ print(' inverse LBS: mean_disp=%.4f max_disp=%.4f' % (disp.mean(), disp.max()))
240
+ return v_tp
241
+
242
+
243
+ # ---- Step 6: Map T-posed rig verts back to original mesh space ---------------
244
+
245
+ def rig_to_original_space(rig_verts_tposed, rig_verts_original, orig_mesh_verts):
246
+ """
247
+ Rig verts are a scaled + translated version of the original mesh verts.
248
+ Recover the (scale, offset) from the mapping:
249
+ rig_vert = orig_vert * scale + offset
250
+
251
+ Estimates scale from height ratio, offset from floor alignment.
252
+ Returns T-posed vertices in original mesh coordinate space.
253
+ """
254
+ rig_h = rig_verts_original[:, 1].max() - rig_verts_original[:, 1].min()
255
+ orig_h = orig_mesh_verts[:, 1].max() - orig_mesh_verts[:, 1].min()
256
+ scale = rig_h / max(orig_h, 1e-6)
257
+
258
+ # The rig aligns: orig * scale, then v[:,1] -= v[:,1].min() (floor at 0)
259
+ # and v[:,0] += smpl_joints[0,0] - cx; v[:,2] += smpl_joints[0,2] - cz
260
+ # We can recover offset from comparing means/floors
261
+ # offset = rig_floor_Y - (orig_floor_Y * scale)
262
+ rig_floor = rig_verts_original[:, 1].min()
263
+ orig_floor = orig_mesh_verts[:, 1].min()
264
+ y_offset = rig_floor - orig_floor * scale
265
+
266
+ # X, Z: center offset
267
+ rig_cx = (rig_verts_original[:, 0].max() + rig_verts_original[:, 0].min()) * 0.5
268
+ orig_cx = (orig_mesh_verts[:, 0].max() + orig_mesh_verts[:, 0].min()) * 0.5
269
+ x_offset = rig_cx - orig_cx * scale
270
+
271
+ rig_cz = (rig_verts_original[:, 2].max() + rig_verts_original[:, 2].min()) * 0.5
272
+ orig_cz = (orig_mesh_verts[:, 2].max() + orig_mesh_verts[:, 2].min()) * 0.5
273
+ z_offset = rig_cz - orig_cz * scale
274
+
275
+ print(' rig->orig: scale=%.4f offset=[%.3f, %.3f, %.3f]' % (scale, x_offset, y_offset, z_offset))
276
+
277
+ # Invert: orig_vert = (rig_vert - offset) / scale
278
+ # For T-posed verts: they're in rig space but T-posed, so same inversion
279
+ tposed_orig = np.zeros_like(rig_verts_tposed)
280
+ tposed_orig[:, 0] = (rig_verts_tposed[:, 0] - x_offset) / scale
281
+ tposed_orig[:, 1] = (rig_verts_tposed[:, 1] - y_offset) / scale
282
+ tposed_orig[:, 2] = (rig_verts_tposed[:, 2] - z_offset) / scale
283
+ return tposed_orig
284
+
285
+
286
+ # ---- SKEL bone geometry ------------------------------------------------------
287
+
288
+ def export_skel_bones(betas, out_path, gender='male'):
289
+ try:
290
+ from skel.skel_model import SKEL
291
+ except ImportError:
292
+ print(' [skel] Not installed')
293
+ return None
294
+ skel_file = os.path.join(SKEL_DIR, 'skel_%s.pkl' % gender)
295
+ if not os.path.exists(skel_file):
296
+ print(' [skel] Weights not found: %s' % skel_file)
297
+ return None
298
+ try:
299
+ skel_model = SKEL(gender=gender, model_path=SKEL_DIR)
300
+ betas_t = betas.unsqueeze(0)[:, :10]
301
+ poses_zero = torch.zeros(1, 46)
302
+ trans_zero = torch.zeros(1, 3)
303
+ with torch.no_grad():
304
+ out = skel_model(poses=poses_zero, betas=betas_t, trans=trans_zero, skelmesh=True)
305
+ bone_verts = out.skel_verts[0].numpy()
306
+ bone_faces = skel_model.skel_f.numpy()
307
+ mesh = trimesh.Trimesh(vertices=bone_verts, faces=bone_faces, process=False)
308
+ mesh.export(out_path)
309
+ print(' [skel] Bone mesh -> %s (%d verts)' % (out_path, len(bone_verts)))
310
+ return out_path
311
+ except Exception as e:
312
+ print(' [skel] Export failed: %s' % e)
313
+ return None
314
+
315
+
316
+ # ---- Main --------------------------------------------------------------------
317
+
318
+ def tpose_smpl(body_glb, out_glb, rig_glb=None, debug_dir=None, skel_out=None):
319
+ device = 'cuda'
320
+
321
+ if not rig_glb or not os.path.exists(rig_glb):
322
+ raise RuntimeError('--rig is required: provide the rigged.glb from the Rig step.')
323
+
324
+ print('[tpose_smpl] Rendering front view ...')
325
+ img_bgr = render_front(body_glb, device=device)
326
+ if debug_dir:
327
+ cv2.imwrite(os.path.join(debug_dir, 'tpose_render.png'), img_bgr)
328
+
329
+ print('[tpose_smpl] Running HMR2 pose estimation ...')
330
+ hmr2_out = run_hmr2(img_bgr, device=device)
331
+ print(' betas: %s' % hmr2_out['betas'].numpy().round(3))
332
+
333
+ print('[tpose_smpl] Reading rigged GLB (rig world space) ...')
334
+ rig_data = read_rigged_glb(rig_glb)
335
+
336
+ print('[tpose_smpl] Loading original mesh for UV/texture ...')
337
+ scene = trimesh.load(body_glb)
338
+ if isinstance(scene, trimesh.Scene):
339
+ geom_name = list(scene.geometry.keys())[0]
340
+ orig_mesh = scene.geometry[geom_name]
341
+ else:
342
+ orig_mesh = scene; geom_name = None
343
+
344
+ orig_verts = np.array(orig_mesh.vertices, dtype=np.float64)
345
+ uvs = np.array(orig_mesh.visual.uv, dtype=np.float64)
346
+ orig_tex = orig_mesh.visual.material.baseColorTexture
347
+ print(' Orig mesh: %d verts Y: [%.3f, %.3f] X: [%.3f, %.3f]' % (
348
+ len(orig_verts),
349
+ orig_verts[:,1].min(), orig_verts[:,1].max(),
350
+ orig_verts[:,0].min(), orig_verts[:,0].max()))
351
+
352
+ print('[tpose_smpl] Computing FK transforms in rig world space ...')
353
+ body_pose_rotmats = hmr2_out['body_pose'] # (23, 3, 3)
354
+ A = compute_rig_fk_transforms(rig_data['J_bind'], body_pose_rotmats)
355
+
356
+ # Verify zero-pose gives identity (sanity check)
357
+ A_zero = compute_rig_fk_transforms(rig_data['J_bind'],
358
+ torch.zeros(23, 3, 3) + torch.eye(3))
359
+ v_test = rig_data['verts'][:3]
360
+ v_h = np.concatenate([v_test, np.ones((3,1))], axis=1)
361
+ T_fwd_test = np.zeros((3, 4, 4))
362
+ for k in range(4):
363
+ ji = rig_data['j_idx'][:3, k]; w = rig_data['w_arr'][:3, k]
364
+ T_fwd_test += w[:, None, None] * A_zero[ji]
365
+ identity_err = np.abs(T_fwd_test - np.eye(4)).max()
366
+ print(' zero-pose identity check: max_err=%.6f (expect ~0)' % identity_err)
367
+
368
+ print('[tpose_smpl] Applying inverse LBS ...')
369
+ rig_verts_tposed = inverse_lbs(
370
+ rig_data['verts'], rig_data['j_idx'], rig_data['w_arr'], A)
371
+
372
+ print('[tpose_smpl] T-posed rig verts: Y: [%.3f, %.3f] X: [%.3f, %.3f]' % (
373
+ rig_verts_tposed[:,1].min(), rig_verts_tposed[:,1].max(),
374
+ rig_verts_tposed[:,0].min(), rig_verts_tposed[:,0].max()))
375
+
376
+ print('[tpose_smpl] Mapping back to original mesh coordinate space ...')
377
+ tposed_orig = rig_to_original_space(
378
+ rig_verts_tposed, rig_data['verts'], orig_verts)
379
+
380
+ print('[tpose_smpl] T-posed orig: Y: [%.3f, %.3f] X: [%.3f, %.3f]' % (
381
+ tposed_orig[:,1].min(), tposed_orig[:,1].max(),
382
+ tposed_orig[:,0].min(), tposed_orig[:,0].max()))
383
+
384
+ orig_mesh.vertices = tposed_orig
385
+ orig_mesh.visual = TextureVisuals(uv=uvs,
386
+ material=PBRMaterial(baseColorTexture=orig_tex))
387
+
388
+ if geom_name and isinstance(scene, trimesh.Scene):
389
+ scene.geometry[geom_name] = orig_mesh
390
+ scene.export(out_glb)
391
+ else:
392
+ orig_mesh.export(out_glb)
393
+
394
+ print('[tpose_smpl] Saved: %s (%d KB)' % (out_glb, os.path.getsize(out_glb)//1024))
395
+
396
+ if skel_out:
397
+ print('[tpose_smpl] Exporting SKEL bone geometry ...')
398
+ export_skel_bones(hmr2_out['betas'], skel_out)
399
+
400
+ return out_glb
401
+
402
+
403
+ if __name__ == '__main__':
404
+ ap = argparse.ArgumentParser()
405
+ ap.add_argument('--body', required=True)
406
+ ap.add_argument('--out', required=True)
407
+ ap.add_argument('--rig', required=True, help='Rigged GLB from rig step')
408
+ ap.add_argument('--skel_out', default=None, help='SKEL BSM bone mesh output')
409
+ ap.add_argument('--debug_dir', default=None)
410
+ args = ap.parse_args()
411
+ os.makedirs(args.debug_dir, exist_ok=True) if args.debug_dir else None
412
+ tpose_smpl(args.body, args.out, rig_glb=args.rig,
413
+ debug_dir=args.debug_dir, skel_out=args.skel_out)