import spaces import os import gradio as gr import numpy as np import torch from PIL import Image import trimesh import random import tempfile import shutil from transformers import AutoModelForImageSegmentation from torchvision import transforms from huggingface_hub import hf_hub_download, snapshot_download import subprocess import sys import urllib.request # install extras subprocess.run("pip install spandrel==0.4.1 --no-deps", shell=True, check=True) DEVICE = "cuda" if torch.cuda.is_available() else "cpu" DTYPE = torch.float16 MAX_SEED = np.iinfo(np.int32).max # ── Repos ───────────────────────────────────────────────────────────────────── ROOT = os.path.dirname(os.path.abspath(__file__)) TRIPOSG_DIR = os.path.join(ROOT, "triposg") MV_DIR = os.path.join(ROOT, "mv_adapter") UNIRIG_DIR = os.path.join(ROOT, "unirig") PSHUMAN_DIR = os.path.join(ROOT, "pshuman") TMP_DIR = os.path.join(ROOT, "tmp") os.makedirs(TMP_DIR, exist_ok=True) CKPT_DIR = os.path.join(ROOT, "checkpoints") os.makedirs(CKPT_DIR, exist_ok=True) TEXTURE_EXT_NAME = "texture.cpython-310-x86_64-linux-gnu.so" TEXTURE_EXT_URL = ( f"https://huggingface.co/spaces/VAST-AI/TripoSG/resolve/main/{TEXTURE_EXT_NAME}" ) if not os.path.exists(TRIPOSG_DIR): os.system( f"git clone https://github.com/VAST-AI-Research/TripoSG.git {TRIPOSG_DIR}" ) if not os.path.exists(MV_DIR): os.system( f"git clone https://github.com/huanngzh/MV-Adapter.git {MV_DIR} && " f"cd {MV_DIR} && git checkout 7d37a97e9bc223cdb8fd26a76bd8dd46504c7c3d" ) if not os.path.exists(UNIRIG_DIR): os.system(f"git clone https://github.com/VAST-AI-Research/UniRig.git {UNIRIG_DIR}") if not os.path.exists(PSHUMAN_DIR): os.system(f"git clone https://github.com/pengHTYX/PSHuman.git {PSHUMAN_DIR}") for p in [ TRIPOSG_DIR, os.path.join(TRIPOSG_DIR, "scripts"), MV_DIR, os.path.join(MV_DIR, "scripts"), ]: if p not in sys.path: sys.path.insert(0, p) def ensure_texture_extension() -> str: texture_ext_path = os.path.join(ROOT, TEXTURE_EXT_NAME) if os.path.exists(texture_ext_path): return texture_ext_path print(f"[startup] downloading {TEXTURE_EXT_NAME}...") urllib.request.urlretrieve(TEXTURE_EXT_URL, texture_ext_path) return texture_ext_path # ── Models ──────────────────────────────────────────────────────────────────── RMBG_CKPT = os.path.join(CKPT_DIR, "RMBG-1.4") TRIPOSG_CKPT = os.path.join(CKPT_DIR, "TripoSG") PSHUMAN_CKPT = os.path.join(CKPT_DIR, "PSHuman") snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_CKPT) snapshot_download("VAST-AI/TripoSG", local_dir=TRIPOSG_CKPT) snapshot_download("pengHTYX/PSHuman_Unclip_768_6views", local_dir=PSHUMAN_CKPT) if not os.path.exists(os.path.join(CKPT_DIR, "RealESRGAN_x2plus.pth")): hf_hub_download( "dtarnow/UPscaler", filename="RealESRGAN_x2plus.pth", local_dir=CKPT_DIR ) if not os.path.exists(os.path.join(CKPT_DIR, "big-lama.pt")): subprocess.run( f"wget -q -P {CKPT_DIR} " "https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt", shell=True, check=True, ) from image_process import prepare_image from briarmbg import BriaRMBG rmbg_net = BriaRMBG.from_pretrained(RMBG_CKPT).to(DEVICE).eval() from triposg.pipelines.pipeline_triposg import TripoSGPipeline triposg_pipe = TripoSGPipeline.from_pretrained(TRIPOSG_CKPT).to(DEVICE, DTYPE) NUM_VIEWS = 6 from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg from mvadapter.utils import get_orthogonal_camera, make_image_grid from mvadapter.utils.render import NVDiffRastContextWrapper, load_mesh, render mv_pipe = prepare_pipeline( base_model="stabilityai/stable-diffusion-xl-base-1.0", vae_model="madebyollin/sdxl-vae-fp16-fix", unet_model=None, lora_model=None, adapter_path="huanngzh/mv-adapter", scheduler=None, num_views=NUM_VIEWS, device=DEVICE, dtype=DTYPE, ) birefnet = AutoModelForImageSegmentation.from_pretrained( "ZhengPeng7/BiRefNet", trust_remote_code=True ).to(DEVICE) transform_image = transforms.Compose( [ transforms.Resize((1024, 1024)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ] ) remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, DEVICE) # ── UniRig helper ───────────────────────────────────────────────────────────── def _run_unirig(glb_path: str, work_dir: str) -> str: env = os.environ.copy() env["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" def _launch(script, args): sh = os.path.join(UNIRIG_DIR, "launch", "inference", script) r = subprocess.run( ["bash", sh] + args, cwd=UNIRIG_DIR, capture_output=True, text=True, timeout=300, env=env, ) if r.returncode != 0: raise RuntimeError(f"UniRig {script} failed:\n{r.stderr[-1000:]}") skel = os.path.join(work_dir, "skeleton.fbx") skin = os.path.join(work_dir, "skin.fbx") out = os.path.join(work_dir, "rigged.glb") _launch("generate_skeleton.sh", ["--input", glb_path, "--output", skel]) _launch("generate_skin.sh", ["--input", skel, "--output", skin]) _launch("merge.sh", ["--source", skin, "--target", glb_path, "--output", out]) if not os.path.exists(out): fallback = "/tmp/rig_out/rigged.glb" if os.path.exists(fallback): shutil.copy2(fallback, out) else: raise RuntimeError("UniRig produced no output") return out # ── Main single-pass function ───────────────────────────────────────────────── @spaces.GPU(duration=600) @torch.no_grad() def generate_avatar(image_path: str, seed: int, req: gr.Request): save_dir = os.path.join(TMP_DIR, str(req.session_hash)) os.makedirs(save_dir, exist_ok=True) # ── 1. Segment + shape ──────────────────────────────────────────────────── image_seg = prepare_image( image_path, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net ) outputs = triposg_pipe( image=image_seg, generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed), num_inference_steps=50, guidance_scale=7.5, ).samples[0] mesh = trimesh.Trimesh( outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1]) ) from utils import simplify_mesh mesh = simplify_mesh(mesh, 100000) hex_id = os.urandom(4).hex() shape_path = os.path.join(save_dir, f"shape_{hex_id}.glb") mesh.export(shape_path) torch.cuda.empty_cache() # ── 2. Texture ──────────────────────────────────────────────────────────── height = width = 768 cameras = get_orthogonal_camera( elevation_deg=[0, 0, 0, 0, 89.99, -89.99], distance=[1.8] * NUM_VIEWS, left=-0.55, right=0.55, bottom=-0.55, top=0.55, azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]], device=DEVICE, ) ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda") mesh_loaded = load_mesh(shape_path, rescale=True, device=DEVICE) render_out = render( ctx, mesh_loaded, cameras, height=height, width=width, render_attr=False, normal_background=0.0, ) control_images = ( torch.cat( [ (render_out.pos + 0.5).clamp(0, 1), (render_out.normal / 2 + 0.5).clamp(0, 1), ], dim=-1, ) .permute(0, 3, 1, 2) .to(DEVICE) ) ref_image = Image.open(image_path) ref_image = remove_bg_fn(ref_image) ref_image = preprocess_image(ref_image, height, width) gen = torch.Generator(device=DEVICE).manual_seed(seed) mv_images = mv_pipe( "high quality", height=height, width=width, num_inference_steps=15, guidance_scale=3.0, num_images_per_prompt=NUM_VIEWS, control_image=control_images, control_conditioning_scale=1.0, reference_image=ref_image, reference_conditioning_scale=1.0, negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast", cross_attention_kwargs=None, generator=gen, ).images torch.cuda.empty_cache() mv_path = os.path.join(save_dir, f"mv_{hex_id}.png") make_image_grid(mv_images, rows=1).save(mv_path) ensure_texture_extension() from texture import TexturePipeline, ModProcessConfig tex_pipe = TexturePipeline( upscaler_ckpt_path=os.path.join(CKPT_DIR, "RealESRGAN_x2plus.pth"), inpaint_ckpt_path=os.path.join(CKPT_DIR, "big-lama.pt"), device=DEVICE, ) textured_path = tex_pipe( mesh_path=shape_path, save_dir=save_dir, save_name=f"textured_{hex_id}.glb", uv_unwarp=True, uv_size=4096, rgb_path=mv_path, rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"), camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]], ) # ── 3. UniRig ───────────────────────────────────────────────────────────── try: rigged_path = _run_unirig(textured_path, save_dir) except Exception as e: print(f"[UniRig] skipped: {e}") rigged_path = textured_path # ── 4. PSHuman face ─────────────────────────────────────────────────────── final_path = rigged_path try: from PIL import Image as PILImage from torchvision.transforms.functional import normalize as _norm pshuman_work = os.path.join(save_dir, "pshuman") os.makedirs(pshuman_work, exist_ok=True) face_png = os.path.join(pshuman_work, "face.png") # RMBG mask for portrait img_t = ( transforms.ToTensor()(Image.open(image_path).resize((1024, 1024))) .unsqueeze(0) .to(DEVICE) ) img_norm = _norm(img_t, [0.5] * 3, [1.0] * 3) with torch.no_grad(): mask = rmbg_net(img_norm)[0][0].sigmoid()[0, 0].cpu().numpy() mask_pil = Image.fromarray((mask * 255).astype(np.uint8)).resize( Image.open(image_path).size ) rgba = Image.open(image_path).convert("RGBA") rgba.putalpha(mask_pil) rgba.save(face_png) pshuman_out = os.path.join(pshuman_work, "out") cfg = os.path.join(PSHUMAN_DIR, "configs/inference-768-6view.yaml") subprocess.run( [ sys.executable, os.path.join(PSHUMAN_DIR, "inference.py"), "--config", cfg, "--data_dir", pshuman_work, "--case_name", "face", "--output_dir", pshuman_out, "--pretrained_model_name_or_path", PSHUMAN_CKPT, ], capture_output=True, text=True, timeout=300, ) face_obj = ( next( ( os.path.join(pshuman_out, f) for f in os.listdir(pshuman_out) if f.endswith(".obj") ), None, ) if os.path.exists(pshuman_out) else None ) if face_obj: sys.path.insert(0, os.path.join(ROOT, "..", "pipeline")) from face_transplant import transplant_face final_path = os.path.join(save_dir, f"avatar_{hex_id}.glb") transplant_face( body_glb_path=rigged_path, pshuman_mesh_path=face_obj, output_path=final_path, weight_threshold=0.5, retract_mm=2.0, ) except Exception as e: print(f"[PSHuman] skipped: {e}") return final_path # ── UI ──────────────────────────────────────────────────────────────────────── def start_session(req: gr.Request): os.makedirs(os.path.join(TMP_DIR, str(req.session_hash)), exist_ok=True) def end_session(req: gr.Request): d = os.path.join(TMP_DIR, str(req.session_hash)) if os.path.exists(d): shutil.rmtree(d) def get_seed(randomize, seed): return random.randint(0, MAX_SEED) if randomize else seed with gr.Blocks(title="MeshForge") as demo: gr.Markdown( "# 🧊 MeshForge\n" "### Portrait → Textured 3D Mesh → Rigged Avatar\n" "Upload a portrait photo and click **Generate Avatar**. " "The full pipeline (shape → texture → rig → face) runs in one pass." ) with gr.Row(): with gr.Column(scale=1): portrait = gr.Image(label="Portrait Photo", type="filepath") seed = gr.Slider(0, MAX_SEED, value=0, step=1, label="Seed") randomize = gr.Checkbox(True, label="Randomize seed") btn = gr.Button("Generate Avatar", variant="primary") with gr.Column(scale=1): output_3d = gr.Model3D(label="Rigged Avatar") output_dl = gr.File(label="Download GLB") btn.click(get_seed, inputs=[randomize, seed], outputs=[seed]).then( generate_avatar, inputs=[portrait, seed], outputs=[output_3d], ).then(lambda x: x, inputs=output_3d, outputs=output_dl) demo.load(start_session) demo.unload(end_session) demo.launch()