Daankular commited on
Commit
ffdf67b
Β·
verified Β·
1 Parent(s): a906825

Upload pipeline/tpose.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. pipeline/tpose.py +332 -0
pipeline/tpose.py ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ tpose.py β€” T-pose a humanoid GLB using YOLO pose estimation.
3
+
4
+ Pipeline:
5
+ 1. Render the mesh from front view (azimuth=-90)
6
+ 2. Run YOLOv8-pose to get 17 COCO keypoints in render-space
7
+ 3. Unproject keypoints through the orthographic camera to 3D
8
+ 4. Build Blender armature with bones at detected 3D joint positions (current pose)
9
+ 5. Auto-weight skin the mesh to this armature
10
+ 6. Rotate arm/leg bones to T-pose, apply deformation, export
11
+
12
+ Usage:
13
+ blender --background --python tpose.py -- <input.glb> <output.glb>
14
+ """
15
+
16
+ import bpy, sys, math, mathutils, os, tempfile
17
+ import numpy as np
18
+
19
+ # ── Args ─────────────────────────────────────────────────────────────────────
20
+ argv = sys.argv
21
+ argv = argv[argv.index("--") + 1:] if "--" in argv else []
22
+ if len(argv) < 2:
23
+ print("Usage: blender --background --python tpose.py -- input.glb output.glb")
24
+ sys.exit(1)
25
+ input_glb = argv[0]
26
+ output_glb = argv[1]
27
+
28
+ # ── Step 1: Render front view using nvdiffrast (outside Blender) ───────────────
29
+ # We do this via a subprocess call before Blender scene setup,
30
+ # using the triposg Python env which has MV-Adapter + nvdiffrast.
31
+ import subprocess, json
32
+
33
+ TRIPOSG_PYTHON = '/root/miniconda/envs/triposg/bin/python'
34
+ RENDER_SCRIPT = '/tmp/_tpose_render.py'
35
+ RENDER_OUT = '/tmp/_tpose_front.png'
36
+ KP_OUT = '/tmp/_tpose_kp.json'
37
+
38
+ render_code = r"""
39
+ import sys, json
40
+ sys.path.insert(0, '/root/MV-Adapter')
41
+ import numpy as np, cv2, torch
42
+ from mvadapter.utils.mesh_utils import (
43
+ NVDiffRastContextWrapper, load_mesh, get_orthogonal_camera, render,
44
+ )
45
+
46
+ body_glb = sys.argv[1]
47
+ out_png = sys.argv[2]
48
+
49
+ device = 'cuda'
50
+ ctx = NVDiffRastContextWrapper(device=device, context_type='cuda')
51
+ mesh_mv = load_mesh(body_glb, rescale=True, device=device)
52
+ camera = get_orthogonal_camera(
53
+ elevation_deg=[0], distance=[1.8],
54
+ left=-0.55, right=0.55, bottom=-0.55, top=0.55,
55
+ azimuth_deg=[-90], device=device,
56
+ )
57
+ out = render(ctx, mesh_mv, camera, height=1024, width=768,
58
+ render_attr=True, render_depth=False, render_normal=False,
59
+ attr_background=0.5)
60
+ img_np = (out.attr[0].cpu().numpy() * 255).clip(0,255).astype('uint8')
61
+ cv2.imwrite(out_png, cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR))
62
+ print(f"Rendered to {out_png}")
63
+ """
64
+
65
+ with open(RENDER_SCRIPT, 'w') as f:
66
+ f.write(render_code)
67
+
68
+ print("[tpose] Rendering front view ...")
69
+ r = subprocess.run([TRIPOSG_PYTHON, RENDER_SCRIPT, input_glb, RENDER_OUT],
70
+ capture_output=True, text=True)
71
+ print(r.stdout.strip()); print(r.stderr[-500:] if r.stderr else '')
72
+
73
+ # ── Step 2: YOLO pose estimation ──────────────────────────────────────────────
74
+ YOLO_SCRIPT = '/tmp/_tpose_yolo.py'
75
+ yolo_code = r"""
76
+ import sys, json
77
+ import cv2
78
+ from ultralytics import YOLO
79
+ import numpy as np
80
+
81
+ img_path = sys.argv[1]
82
+ kp_path = sys.argv[2]
83
+
84
+ model = YOLO('yolov8n-pose.pt')
85
+ img = cv2.imread(img_path)
86
+ H, W = img.shape[:2]
87
+
88
+ results = model(img, verbose=False)
89
+ if not results or results[0].keypoints is None:
90
+ print("ERROR: no person detected"); sys.exit(1)
91
+
92
+ # Pick detection with highest confidence
93
+ kps_all = results[0].keypoints.data.cpu().numpy() # (N, 17, 3)
94
+ confs = kps_all[:, :, 2].mean(axis=1)
95
+ best = kps_all[confs.argmax()] # (17, 3): x, y, conf
96
+
97
+ # COCO 17 keypoints:
98
+ # 0=nose 1=left_eye 2=right_eye 3=left_ear 4=right_ear
99
+ # 5=left_shoulder 6=right_shoulder 7=left_elbow 8=right_elbow
100
+ # 9=left_wrist 10=right_wrist 11=left_hip 12=right_hip
101
+ # 13=left_knee 14=right_knee 15=left_ankle 16=right_ankle
102
+
103
+ names = ['nose','left_eye','right_eye','left_ear','right_ear',
104
+ 'left_shoulder','right_shoulder','left_elbow','right_elbow',
105
+ 'left_wrist','right_wrist','left_hip','right_hip',
106
+ 'left_knee','right_knee','left_ankle','right_ankle']
107
+
108
+ kp_dict = {}
109
+ for i, name in enumerate(names):
110
+ x, y, c = best[i]
111
+ kp_dict[name] = {'x': float(x)/W, 'y': float(y)/H, 'conf': float(c)}
112
+ print(f" {name}: ({x:.1f},{y:.1f}) conf={c:.2f}")
113
+
114
+ kp_dict['img_hw'] = [int(H), int(W)]
115
+ with open(kp_path, 'w') as f:
116
+ json.dump(kp_dict, f)
117
+ print(f"Keypoints saved to {kp_path}")
118
+ """
119
+
120
+ with open(YOLO_SCRIPT, 'w') as f:
121
+ f.write(yolo_code)
122
+
123
+ print("[tpose] Running YOLO pose estimation ...")
124
+ r2 = subprocess.run([TRIPOSG_PYTHON, YOLO_SCRIPT, RENDER_OUT, KP_OUT],
125
+ capture_output=True, text=True)
126
+ print(r2.stdout.strip()); print(r2.stderr[-300:] if r2.stderr else '')
127
+
128
+ if not os.path.exists(KP_OUT):
129
+ print("ERROR: YOLO failed β€” falling back to heuristic")
130
+ kp_data = None
131
+ else:
132
+ with open(KP_OUT) as f:
133
+ kp_data = json.load(f)
134
+
135
+ # ── Step 3: Unproject render-space keypoints to 3D ────────────────────────────
136
+ # Orthographic camera: left=-0.55, right=0.55, bottom=-0.55, top=0.55
137
+ # Render: 768Γ—1024. NDC x = 2*(px/W)-1, ndc y = 1-2*(py/H)
138
+ # World X = ndc_x * 0.55, World Y (mesh up) = ndc_y * 0.55
139
+ # We need 3D positions in the ORIGINAL mesh coordinate space.
140
+ # After Blender GLB import, original mesh Y β†’ Blender Z, original Z β†’ Blender -Y
141
+
142
+ def kp_to_3d(name, z_default=0.0):
143
+ """Convert YOLO keypoint (image fraction) β†’ Blender 3D coords."""
144
+ if kp_data is None or name not in kp_data:
145
+ return None
146
+ k = kp_data[name]
147
+ if k['conf'] < 0.3:
148
+ return None
149
+ # Image coords (fractions) β†’ NDC
150
+ ndc_x = 2 * k['x'] - 1.0 # left→right = mesh X
151
+ ndc_y = -(2 * k['y'] - 1.0) # top→bottom = mesh Y (up)
152
+ # Orthographic: frustum Β±0.55
153
+ mesh_x = ndc_x * 0.55
154
+ mesh_y = ndc_y * 0.55 # this is mesh-space Y (vertical)
155
+ # After GLB import: mesh Y β†’ Blender Z, mesh Z β†’ Blender -Y
156
+ bl_x = mesh_x
157
+ bl_z = mesh_y # height
158
+ bl_y = z_default # depth (not observable from front view)
159
+ return (bl_x, bl_y, bl_z)
160
+
161
+ # Key joint positions in Blender space
162
+ J = {}
163
+ for name in ['nose','left_shoulder','right_shoulder','left_elbow','right_elbow',
164
+ 'left_wrist','right_wrist','left_hip','right_hip',
165
+ 'left_knee','right_knee','left_ankle','right_ankle']:
166
+ p = kp_to_3d(name)
167
+ if p: J[name] = p
168
+
169
+ print(f"[tpose] Detected joints: {list(J.keys())}")
170
+
171
+ # ── Step 4: Set up Blender scene ──────────────────────────────────────────────
172
+ bpy.ops.wm.read_factory_settings(use_empty=True)
173
+ bpy.ops.import_scene.gltf(filepath=input_glb)
174
+ bpy.context.view_layer.update()
175
+
176
+ mesh_obj = next((o for o in bpy.data.objects if o.type == 'MESH'), None)
177
+ if not mesh_obj:
178
+ print("ERROR: no mesh"); sys.exit(1)
179
+
180
+ verts_w = np.array([mesh_obj.matrix_world @ v.co for v in mesh_obj.data.vertices])
181
+ z_min, z_max = verts_w[:,2].min(), verts_w[:,2].max()
182
+ x_c = (verts_w[:,0].min() + verts_w[:,0].max()) / 2
183
+ y_c = (verts_w[:,1].min() + verts_w[:,1].max()) / 2
184
+ H_mesh = z_max - z_min
185
+
186
+ def zh(frac): return z_min + frac * H_mesh
187
+ def jv(name, fallback_frac=None, fallback_x=0.0):
188
+ """Get joint position from YOLO or use fallback."""
189
+ if name in J:
190
+ x, y, z = J[name]
191
+ return (x, y_c, z) # use mesh y_c for depth
192
+ if fallback_frac is not None:
193
+ return (x_c + fallback_x, y_c, zh(fallback_frac))
194
+ return None
195
+
196
+ # ── Step 5: Build armature in CURRENT pose ────────────────────────────────────
197
+ bpy.ops.object.armature_add(location=(x_c, y_c, zh(0.5)))
198
+ arm_obj = bpy.context.object
199
+ arm_obj.name = 'PoseRig'
200
+ arm = arm_obj.data
201
+
202
+ bpy.ops.object.mode_set(mode='EDIT')
203
+ eb = arm.edit_bones
204
+
205
+ def V(xyz): return mathutils.Vector(xyz)
206
+
207
+ def add_bone(name, head, tail, parent=None, connect=False):
208
+ b = eb.new(name)
209
+ b.head = V(head)
210
+ b.tail = V(tail)
211
+ if parent and parent in eb:
212
+ b.parent = eb[parent]
213
+ b.use_connect = connect
214
+ return b
215
+
216
+ # Helper: midpoint
217
+ def mid(a, b): return tuple((a[i]+b[i])/2 for i in range(3))
218
+ def offset(p, dx=0, dy=0, dz=0): return (p[0]+dx, p[1]+dy, p[2]+dz)
219
+
220
+ # ── Spine / hips ─────────────────────────────────────────────────────────────
221
+ hip_L = jv('left_hip', 0.48, -0.07)
222
+ hip_R = jv('right_hip', 0.48, 0.07)
223
+ sh_L = jv('left_shoulder', 0.77, -0.20)
224
+ sh_R = jv('right_shoulder', 0.77, 0.20)
225
+ nose = jv('nose', 0.92)
226
+
227
+ hips_c = mid(hip_L, hip_R) if (hip_L and hip_R) else (x_c, y_c, zh(0.48))
228
+ sh_c = mid(sh_L, sh_R) if (sh_L and sh_R) else (x_c, y_c, zh(0.77))
229
+
230
+ add_bone('Hips', hips_c, offset(hips_c, dz=H_mesh*0.08))
231
+ add_bone('Spine', hips_c, offset(hips_c, dz=(sh_c[2]-hips_c[2])*0.5), 'Hips')
232
+ add_bone('Chest', offset(hips_c, dz=(sh_c[2]-hips_c[2])*0.5), sh_c, 'Spine', True)
233
+ if nose:
234
+ neck_z = sh_c[2] + (nose[2]-sh_c[2])*0.35
235
+ head_z = sh_c[2] + (nose[2]-sh_c[2])*0.65
236
+ add_bone('Neck', (x_c, y_c, neck_z), (x_c, y_c, head_z), 'Chest')
237
+ add_bone('Head', (x_c, y_c, head_z), (x_c, y_c, nose[2]+H_mesh*0.05), 'Neck', True)
238
+ else:
239
+ add_bone('Neck', sh_c, offset(sh_c, dz=H_mesh*0.06), 'Chest')
240
+ add_bone('Head', offset(sh_c, dz=H_mesh*0.06), offset(sh_c, dz=H_mesh*0.14), 'Neck', True)
241
+
242
+ # ── Arms (placed at DETECTED current pose positions) ─────────────────────────
243
+ el_L = jv('left_elbow', 0.60, -0.30)
244
+ el_R = jv('right_elbow', 0.60, 0.30)
245
+ wr_L = jv('left_wrist', 0.45, -0.25)
246
+ wr_R = jv('right_wrist', 0.45, 0.25)
247
+
248
+ for side, sh, el, wr in (('L', sh_L, el_L, wr_L), ('R', sh_R, el_R, wr_R)):
249
+ if not sh: continue
250
+ el_pos = el if el else offset(sh, dz=-H_mesh*0.15)
251
+ wr_pos = wr if wr else offset(el_pos, dz=-H_mesh*0.15)
252
+ hand = offset(wr_pos, dz=(wr_pos[2]-el_pos[2])*0.4)
253
+ add_bone(f'UpperArm.{side}', sh, el_pos, 'Chest')
254
+ add_bone(f'ForeArm.{side}', el_pos, wr_pos, f'UpperArm.{side}', True)
255
+ add_bone(f'Hand.{side}', wr_pos, hand, f'ForeArm.{side}', True)
256
+
257
+ # ── Legs ─────────────────────────────────────────────────────────────────────
258
+ kn_L = jv('left_knee', 0.25, -0.07)
259
+ kn_R = jv('right_knee', 0.25, 0.07)
260
+ an_L = jv('left_ankle', 0.04, -0.06)
261
+ an_R = jv('right_ankle', 0.04, 0.06)
262
+
263
+ for side, hp, kn, an in (('L', hip_L, kn_L, an_L), ('R', hip_R, kn_R, an_R)):
264
+ if not hp: continue
265
+ kn_pos = kn if kn else offset(hp, dz=-H_mesh*0.23)
266
+ an_pos = an if an else offset(kn_pos, dz=-H_mesh*0.22)
267
+ toe = offset(an_pos, dy=-H_mesh*0.06, dz=-H_mesh*0.02)
268
+ add_bone(f'UpperLeg.{side}', hp, kn_pos, 'Hips')
269
+ add_bone(f'LowerLeg.{side}', kn_pos, an_pos, f'UpperLeg.{side}', True)
270
+ add_bone(f'Foot.{side}', an_pos, toe, f'LowerLeg.{side}', True)
271
+
272
+ bpy.ops.object.mode_set(mode='OBJECT')
273
+
274
+ # ── Step 6: Skin mesh to armature ────────────────────────────────────────────
275
+ bpy.context.view_layer.objects.active = arm_obj
276
+ mesh_obj.select_set(True)
277
+ arm_obj.select_set(True)
278
+ bpy.ops.object.parent_set(type='ARMATURE_AUTO')
279
+ print("[tpose] Auto-weights applied")
280
+
281
+ # ── Step 7: Pose arms to T-pose ───────────────────────────────────────────────
282
+ # Compute per-arm rotation: from (current elbow - shoulder) direction β†’ horizontal Β±X
283
+ bpy.context.view_layer.objects.active = arm_obj
284
+ bpy.ops.object.mode_set(mode='POSE')
285
+
286
+ pb = arm_obj.pose.bones
287
+
288
+ def set_tpose_arm(side, sh_pos, el_pos):
289
+ if not sh_pos or not el_pos:
290
+ return
291
+ if f'UpperArm.{side}' not in pb:
292
+ return
293
+ # Current upper-arm direction in armature local space
294
+ sx = -1 if side == 'L' else 1
295
+ # T-pose direction: Β±X horizontal
296
+ tpose_dir = mathutils.Vector((sx, 0, 0))
297
+ # Current bone direction (head→tail) in world space
298
+ bone = arm_obj.data.bones[f'UpperArm.{side}']
299
+ cur_dir = (bone.tail_local - bone.head_local).normalized()
300
+ # Rotation needed in bone's local space
301
+ rot_quat = cur_dir.rotation_difference(tpose_dir)
302
+ pb[f'UpperArm.{side}'].rotation_mode = 'QUATERNION'
303
+ pb[f'UpperArm.{side}'].rotation_quaternion = rot_quat
304
+
305
+ # Straighten forearm along the same axis
306
+ if f'ForeArm.{side}' in pb:
307
+ pb[f'ForeArm.{side}'].rotation_mode = 'QUATERNION'
308
+ pb[f'ForeArm.{side}'].rotation_quaternion = mathutils.Quaternion((1,0,0,0))
309
+
310
+ set_tpose_arm('L', sh_L, el_L)
311
+ set_tpose_arm('R', sh_R, el_R)
312
+
313
+ bpy.context.view_layer.update()
314
+ bpy.ops.object.mode_set(mode='OBJECT')
315
+
316
+ # ── Step 8: Apply armature modifier ──────────────────────────────────────────
317
+ bpy.context.view_layer.objects.active = mesh_obj
318
+ mesh_obj.select_set(True)
319
+ for mod in mesh_obj.modifiers:
320
+ if mod.type == 'ARMATURE':
321
+ bpy.ops.object.modifier_apply(modifier=mod.name)
322
+ print(f"[tpose] Applied modifier: {mod.name}")
323
+ break
324
+
325
+ bpy.data.objects.remove(arm_obj, do_unlink=True)
326
+
327
+ # ── Step 9: Export ────────────────────────────────────────────────────────────
328
+ bpy.ops.export_scene.gltf(
329
+ filepath=output_glb, export_format='GLB',
330
+ export_texcoords=True, export_normals=True,
331
+ export_materials='EXPORT', use_selection=False)
332
+ print(f"[tpose] Done β†’ {output_glb}")