Daankular commited on
Commit
6c18e94
Β·
verified Β·
1 Parent(s): 6b843dc

Upload pipeline/face_inswap_bake.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. pipeline/face_inswap_bake.py +302 -0
pipeline/face_inswap_bake.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ face_inswap_bake.py β€” Proper face swap on rendered views, then UV-bake.
3
+
4
+ Pipeline:
5
+ 1. Render the mesh from multiple views (front + L/R 3-quarter)
6
+ 2. Run inswapper_128 to swap reference face onto each rendered view
7
+ 3. uv_render_attr() bakes each swapped render directly into UV texture
8
+ (render-space coords shared with UV lookup β€” no coordinate transforms)
9
+ 4. Composite multiple views (front takes priority, sides fill gaps)
10
+ 5. Save updated GLB
11
+
12
+ Usage:
13
+ python face_inswap_bake.py \
14
+ --body /tmp/triposg_textured.glb \
15
+ --face /tmp/triposg_face_ref.png \
16
+ --out /tmp/face_swapped.glb \
17
+ [--uv_size 4096] [--debug_dir /tmp]
18
+ """
19
+
20
+ import os, sys, argparse, warnings
21
+ warnings.filterwarnings('ignore')
22
+
23
+ import numpy as np
24
+ import cv2
25
+ import torch
26
+ import torch.nn.functional as F
27
+ from PIL import Image
28
+ import trimesh
29
+ from trimesh.visual.texture import TextureVisuals
30
+ from trimesh.visual.material import PBRMaterial
31
+
32
+ sys.path.insert(0, '/root/MV-Adapter')
33
+ from mvadapter.utils.mesh_utils import (
34
+ NVDiffRastContextWrapper, load_mesh, get_orthogonal_camera, render,
35
+ )
36
+ from mvadapter.utils.mesh_utils.uv import (
37
+ uv_precompute, uv_render_geometry, uv_render_attr,
38
+ )
39
+ from insightface.app import FaceAnalysis
40
+ import insightface
41
+ from gfpgan import GFPGANer
42
+
43
+
44
+ GFPGAN_PATH = '/root/MV-Adapter/checkpoints/GFPGANv1.4.pth'
45
+
46
+
47
+ # ── helpers ───────────────────────────────────────────────────────────────────
48
+
49
+ def _build_front_face_uv_mask(mesh_t, tex_H, tex_W, neck_frac=0.76):
50
+ """UV-space mask covering only front-facing head triangles (no back-of-head)."""
51
+ verts = np.array(mesh_t.vertices, dtype=np.float64)
52
+ faces = np.array(mesh_t.faces, dtype=np.int32)
53
+ uvs = np.array(mesh_t.visual.uv, dtype=np.float64)
54
+
55
+ y_min, y_max = verts[:, 1].min(), verts[:, 1].max()
56
+ neck_y = float(y_min + (y_max - y_min) * neck_frac)
57
+ head_idx = np.where(verts[:, 1] > neck_y)[0]
58
+ hv = verts[head_idx]
59
+
60
+ z_thresh = float(np.percentile(hv[:, 2], 40))
61
+ front = hv[:, 2] >= z_thresh
62
+ if front.sum() < 30:
63
+ front = np.ones(len(hv), bool)
64
+
65
+ face_vert_idx = head_idx[front]
66
+ face_vert_mask = np.zeros(len(verts), bool)
67
+ face_vert_mask[face_vert_idx] = True
68
+ face_tri_mask = face_vert_mask[faces].all(axis=1)
69
+ face_tris = faces[face_tri_mask]
70
+ print(f' Geometry mask: {face_tri_mask.sum()} front-face triangles '
71
+ f'(neck_y={neck_y:.3f}, z_thresh={z_thresh:.3f})')
72
+
73
+ geom_mask = np.zeros((tex_H, tex_W), dtype=np.float32)
74
+ pts_list = []
75
+ for tri in face_tris:
76
+ uv = uvs[tri]
77
+ px = uv[:, 0] * tex_W
78
+ py = (1.0 - uv[:, 1]) * tex_H
79
+ pts_list.append(np.column_stack([px, py]).astype(np.int32))
80
+ if pts_list:
81
+ cv2.fillPoly(geom_mask, pts_list, 1.0)
82
+
83
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
84
+ geom_mask = cv2.dilate(geom_mask, kernel, iterations=2)
85
+ geom_mask = cv2.erode(geom_mask, kernel, iterations=1)
86
+ geom_mask = cv2.GaussianBlur(geom_mask, (31, 31), 8)
87
+ return geom_mask
88
+
89
+
90
+ def _detect_largest_face(img_bgr, app):
91
+ faces = app.get(img_bgr)
92
+ if not faces:
93
+ return None
94
+ return max(faces, key=lambda f: (f.bbox[2]-f.bbox[0])*(f.bbox[3]-f.bbox[1]))
95
+
96
+
97
+ def _render_view(ctx, mesh_mv, uv_pre, azimuth_deg, H, W, device):
98
+ """Render the mesh from a given azimuth; return (camera, uv_geom)."""
99
+ camera = get_orthogonal_camera(
100
+ elevation_deg=[0], distance=[1.8],
101
+ left=-0.55, right=0.55, bottom=-0.55, top=0.55,
102
+ azimuth_deg=[azimuth_deg], device=device,
103
+ )
104
+ uv_geom = uv_render_geometry(
105
+ ctx, mesh_mv, camera,
106
+ view_height=H, view_width=W,
107
+ uv_precompute_output=uv_pre,
108
+ compute_depth_grad=False,
109
+ )
110
+ return camera, uv_geom
111
+
112
+
113
+ def face_inswap_bake(body_glb, face_img_path, out_glb,
114
+ uv_size=4096, debug_dir=None):
115
+
116
+ device = 'cuda'
117
+ INSWAPPER_PATH = '/root/MV-Adapter/checkpoints/inswapper_128.onnx'
118
+
119
+ # ── Load GFPGAN enhancer ──────────────────────────────────────────────────
120
+ print('[fib] Loading GFPGANv1.4 ...')
121
+ enhancer = GFPGANer(
122
+ model_path=GFPGAN_PATH,
123
+ upscale=1,
124
+ arch='clean',
125
+ channel_multiplier=2,
126
+ bg_upsampler=None,
127
+ )
128
+
129
+ # ── Load mesh ─────────────────────────────────────────────────────────────
130
+ print(f'[fib] Loading mesh: {body_glb}')
131
+ ctx = NVDiffRastContextWrapper(device=device, context_type='cuda')
132
+ mesh_mv = load_mesh(body_glb, rescale=True, device=device)
133
+
134
+ scene_t = trimesh.load(body_glb)
135
+ if isinstance(scene_t, trimesh.Scene):
136
+ geom_name = list(scene_t.geometry.keys())[0]
137
+ mesh_t = scene_t.geometry[geom_name]
138
+ else:
139
+ mesh_t = scene_t; geom_name = None
140
+
141
+ orig_tex_np = np.array(mesh_t.visual.material.baseColorTexture, dtype=np.float32) / 255.0
142
+ uvs = np.array(mesh_t.visual.uv, dtype=np.float64)
143
+ tex_H, tex_W = orig_tex_np.shape[:2]
144
+ print(f' Texture: {tex_W}Γ—{tex_H}')
145
+
146
+ # Build geometry mask (front-face head triangles only) at UV resolution
147
+ print('[fib] Building front-face geometry UV mask ...')
148
+ geom_uv_mask = _build_front_face_uv_mask(mesh_t, uv_size, uv_size)
149
+
150
+ # Render dimensions (match triposg_app.py)
151
+ H_r, W_r = 1024, 768
152
+
153
+ # ── Precompute UV geometry ─────────────────────────────────────────────────
154
+ print(f'[fib] Precomputing UV geometry ({uv_size}Γ—{uv_size}) ...')
155
+ uv_pre = uv_precompute(ctx, mesh_mv, height=uv_size, width=uv_size)
156
+
157
+ # ── Load face swap model + face detector ──────────────────────────────────
158
+ print('[fib] Loading inswapper_128 ...')
159
+ swapper = insightface.model_zoo.get_model(
160
+ INSWAPPER_PATH, download=False,
161
+ providers=['CUDAExecutionProvider', 'CPUExecutionProvider'],
162
+ )
163
+
164
+ app = FaceAnalysis(providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
165
+ app.prepare(ctx_id=0, det_size=(640, 640))
166
+
167
+ ref_bgr = cv2.imread(face_img_path)
168
+ ref_face = _detect_largest_face(ref_bgr, app)
169
+ if ref_face is None:
170
+ raise RuntimeError(f'No face detected in reference: {face_img_path}')
171
+ print(f' Reference face detected: bbox={ref_face.bbox.astype(int).tolist()}')
172
+
173
+ # ── Process each view ─────────────────────────────────────────────────────
174
+ # Views: front (azimuth=-90), slight left (-60), slight right (-120)
175
+ # Azimuth convention from MV-Adapter: -90 = front-facing
176
+ views = [
177
+ ('front', -90, 1.0), # (name, azimuth_deg, priority_weight)
178
+ ('threequarter_r', -60, 0.7),
179
+ ('threequarter_l', -120, 0.7),
180
+ ]
181
+
182
+ # Accumulators for weighted UV compositing
183
+ uv_colour_acc = np.zeros((uv_size, uv_size, 3), dtype=np.float32)
184
+ uv_weight_acc = np.zeros((uv_size, uv_size), dtype=np.float32)
185
+
186
+ for view_name, azimuth, weight in views:
187
+ print(f'\n[fib] View: {view_name} (azimuth={azimuth}Β°)')
188
+
189
+ # Create camera + UV geometry for this view
190
+ camera, uv_geom = _render_view(ctx, mesh_mv, uv_pre, azimuth, H_r, W_r, device)
191
+
192
+ # Render textured mesh from this view
193
+ render_out = render(ctx, mesh_mv, camera, height=H_r, width=W_r,
194
+ render_attr=True, render_depth=False, render_normal=False,
195
+ attr_background=0.0)
196
+ # render_out.attr: (1, H, W, 3) float in [0,1]
197
+ rendered_np = (render_out.attr[0].cpu().numpy() * 255).clip(0, 255).astype(np.uint8)
198
+ rendered_bgr = cv2.cvtColor(rendered_np, cv2.COLOR_RGB2BGR)
199
+
200
+ if debug_dir:
201
+ cv2.imwrite(os.path.join(debug_dir, f'fib_render_{view_name}.png'), rendered_bgr)
202
+
203
+ # Detect face in this rendered view
204
+ tgt_face = _detect_largest_face(rendered_bgr, app)
205
+ if tgt_face is None:
206
+ print(f' No face in {view_name} render β€” skipping')
207
+ continue
208
+ print(f' Target face: bbox={tgt_face.bbox.astype(int).tolist()}')
209
+
210
+ # Swap face
211
+ swapped_bgr = swapper.get(rendered_bgr.copy(), tgt_face, ref_face, paste_back=True)
212
+
213
+ # Enhance face detail with GFPGAN
214
+ _, _, enhanced_bgr = enhancer.enhance(
215
+ swapped_bgr, has_aligned=False, only_center_face=False, paste_back=True)
216
+ if enhanced_bgr is not None:
217
+ swapped_bgr = enhanced_bgr
218
+ print(f' GFPGAN enhanced')
219
+
220
+ if debug_dir:
221
+ cv2.imwrite(os.path.join(debug_dir, f'fib_swapped_{view_name}.png'), swapped_bgr)
222
+
223
+ swapped_rgb = cv2.cvtColor(swapped_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
224
+
225
+ # Build render-space face hull mask
226
+ kps = tgt_face.kps
227
+ hull_pts = cv2.convexHull(kps.astype(np.float32)).squeeze(1)
228
+ hull_cx, hull_cy = hull_pts.mean(axis=0)
229
+ hull_exp = (hull_pts - [hull_cx, hull_cy]) * 3.5 + [hull_cx, hull_cy]
230
+ face_mask = np.zeros((H_r, W_r), dtype=np.float32)
231
+ cv2.fillPoly(face_mask, [hull_exp.astype(np.int32)], 1.0)
232
+ face_mask = cv2.GaussianBlur(face_mask, (61, 61), 20)
233
+
234
+ # Bake swapped render into UV space
235
+ swapped_t = torch.tensor(swapped_rgb, device=device).unsqueeze(0) # (1,H,W,3)
236
+ mask_t = torch.tensor(face_mask[None], device=device)
237
+
238
+ uv_out = uv_render_attr(
239
+ images=swapped_t,
240
+ masks=mask_t,
241
+ uv_render_geometry_output=uv_geom,
242
+ )
243
+ uv_img = uv_out.uv_attr_proj[0].cpu().numpy() # (uv, uv, 3)
244
+ uv_mask = uv_out.uv_mask_proj[0].cpu().numpy() # (uv, uv)
245
+
246
+ # Kill back-of-head UV islands
247
+ uv_mask = uv_mask * geom_uv_mask
248
+
249
+ # Weighted accumulate
250
+ w = uv_mask * weight
251
+ uv_colour_acc += uv_img * w[..., None]
252
+ uv_weight_acc += w
253
+ print(f' Painted texels: {(uv_mask > 0.05).sum()}')
254
+
255
+ # ── Composite ──────────────────────────────────────────────────────────────
256
+ print('\n[fib] Compositing views ...')
257
+ valid = uv_weight_acc > 0.01
258
+ uv_final = np.where(valid[..., None],
259
+ uv_colour_acc / np.maximum(uv_weight_acc[..., None], 1e-6),
260
+ orig_tex_np[:uv_size, :uv_size] if uv_size <= tex_H else orig_tex_np)
261
+
262
+ # Resize to texture resolution if needed
263
+ if uv_size != tex_H or uv_size != tex_W:
264
+ uv_final_rs = cv2.resize(uv_final, (tex_W, tex_H), interpolation=cv2.INTER_LINEAR)
265
+ weight_rs = cv2.resize(uv_weight_acc, (tex_W, tex_H), interpolation=cv2.INTER_LINEAR)
266
+ else:
267
+ uv_final_rs = uv_final
268
+ weight_rs = uv_weight_acc
269
+
270
+ # Blend with original texture: use face-swap result where painted, orig elsewhere
271
+ alpha = np.clip(weight_rs, 0, 1)[..., None]
272
+ new_tex = uv_final_rs * alpha + orig_tex_np * (1.0 - alpha)
273
+ print(f' Total painted texels (tex res): {(weight_rs > 0.05).sum()}')
274
+
275
+ if debug_dir:
276
+ Image.fromarray((uv_final_rs * 255).clip(0,255).astype(np.uint8)).save(
277
+ os.path.join(debug_dir, 'fib_uv_composite.png'))
278
+
279
+ # ── Save GLB ──────────────────────────────────────────────────────────────
280
+ new_pil = Image.fromarray((new_tex * 255).clip(0, 255).astype(np.uint8))
281
+ mesh_t.visual = TextureVisuals(uv=uvs, material=PBRMaterial(baseColorTexture=new_pil))
282
+
283
+ if geom_name and isinstance(scene_t, trimesh.Scene):
284
+ scene_t.geometry[geom_name] = mesh_t
285
+ scene_t.export(out_glb)
286
+ else:
287
+ mesh_t.export(out_glb)
288
+
289
+ print(f'[fib] Saved: {out_glb} ({os.path.getsize(out_glb)//1024} KB)')
290
+ return out_glb
291
+
292
+
293
+ if __name__ == '__main__':
294
+ ap = argparse.ArgumentParser()
295
+ ap.add_argument('--body', required=True)
296
+ ap.add_argument('--face', required=True)
297
+ ap.add_argument('--out', required=True)
298
+ ap.add_argument('--uv_size', type=int, default=4096)
299
+ ap.add_argument('--debug_dir', default=None)
300
+ args = ap.parse_args()
301
+ face_inswap_bake(args.body, args.face, args.out,
302
+ uv_size=args.uv_size, debug_dir=args.debug_dir)