Spaces:

Daankular
/

MeshForge

Runtime error

File size: 14,606 Bytes

import spaces
import os
import gradio as gr
import numpy as np
import torch
from PIL import Image
import trimesh
import random
import tempfile
import shutil
from transformers import AutoModelForImageSegmentation
from torchvision import transforms
from huggingface_hub import hf_hub_download, snapshot_download
import subprocess
import sys
import urllib.request

# install extras
subprocess.run("pip install spandrel==0.4.1 --no-deps", shell=True, check=True)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16
MAX_SEED = np.iinfo(np.int32).max

# ── Repos ─────────────────────────────────────────────────────────────────────
ROOT = os.path.dirname(os.path.abspath(__file__))

TRIPOSG_DIR = os.path.join(ROOT, "triposg")
MV_DIR = os.path.join(ROOT, "mv_adapter")
UNIRIG_DIR = os.path.join(ROOT, "unirig")
PSHUMAN_DIR = os.path.join(ROOT, "pshuman")
TMP_DIR = os.path.join(ROOT, "tmp")
os.makedirs(TMP_DIR, exist_ok=True)
CKPT_DIR = os.path.join(ROOT, "checkpoints")
os.makedirs(CKPT_DIR, exist_ok=True)

TEXTURE_EXT_NAME = "texture.cpython-310-x86_64-linux-gnu.so"
TEXTURE_EXT_URL = (
    f"https://huggingface.co/spaces/VAST-AI/TripoSG/resolve/main/{TEXTURE_EXT_NAME}"
)

if not os.path.exists(TRIPOSG_DIR):
    os.system(
        f"git clone https://github.com/VAST-AI-Research/TripoSG.git {TRIPOSG_DIR}"
    )

if not os.path.exists(MV_DIR):
    os.system(
        f"git clone https://github.com/huanngzh/MV-Adapter.git {MV_DIR} && "
        f"cd {MV_DIR} && git checkout 7d37a97e9bc223cdb8fd26a76bd8dd46504c7c3d"
    )

if not os.path.exists(UNIRIG_DIR):
    os.system(f"git clone https://github.com/VAST-AI-Research/UniRig.git {UNIRIG_DIR}")

if not os.path.exists(PSHUMAN_DIR):
    os.system(f"git clone https://github.com/pengHTYX/PSHuman.git {PSHUMAN_DIR}")

for p in [
    TRIPOSG_DIR,
    os.path.join(TRIPOSG_DIR, "scripts"),
    MV_DIR,
    os.path.join(MV_DIR, "scripts"),
]:
    if p not in sys.path:
        sys.path.insert(0, p)


def ensure_texture_extension() -> str:
    texture_ext_path = os.path.join(ROOT, TEXTURE_EXT_NAME)
    if os.path.exists(texture_ext_path):
        return texture_ext_path
    print(f"[startup] downloading {TEXTURE_EXT_NAME}...")
    urllib.request.urlretrieve(TEXTURE_EXT_URL, texture_ext_path)
    return texture_ext_path


# ── Models ────────────────────────────────────────────────────────────────────
RMBG_CKPT = os.path.join(CKPT_DIR, "RMBG-1.4")
TRIPOSG_CKPT = os.path.join(CKPT_DIR, "TripoSG")
PSHUMAN_CKPT = os.path.join(CKPT_DIR, "PSHuman")

snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_CKPT)
snapshot_download("VAST-AI/TripoSG", local_dir=TRIPOSG_CKPT)
snapshot_download("pengHTYX/PSHuman_Unclip_768_6views", local_dir=PSHUMAN_CKPT)

if not os.path.exists(os.path.join(CKPT_DIR, "RealESRGAN_x2plus.pth")):
    hf_hub_download(
        "dtarnow/UPscaler", filename="RealESRGAN_x2plus.pth", local_dir=CKPT_DIR
    )

if not os.path.exists(os.path.join(CKPT_DIR, "big-lama.pt")):
    subprocess.run(
        f"wget -q -P {CKPT_DIR} "
        "https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt",
        shell=True,
        check=True,
    )

from image_process import prepare_image
from briarmbg import BriaRMBG

rmbg_net = BriaRMBG.from_pretrained(RMBG_CKPT).to(DEVICE).eval()

from triposg.pipelines.pipeline_triposg import TripoSGPipeline

triposg_pipe = TripoSGPipeline.from_pretrained(TRIPOSG_CKPT).to(DEVICE, DTYPE)

NUM_VIEWS = 6
from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg
from mvadapter.utils import get_orthogonal_camera, make_image_grid
from mvadapter.utils.render import NVDiffRastContextWrapper, load_mesh, render

mv_pipe = prepare_pipeline(
    base_model="stabilityai/stable-diffusion-xl-base-1.0",
    vae_model="madebyollin/sdxl-vae-fp16-fix",
    unet_model=None,
    lora_model=None,
    adapter_path="huanngzh/mv-adapter",
    scheduler=None,
    num_views=NUM_VIEWS,
    device=DEVICE,
    dtype=DTYPE,
)
birefnet = AutoModelForImageSegmentation.from_pretrained(
    "ZhengPeng7/BiRefNet", trust_remote_code=True
).to(DEVICE)
transform_image = transforms.Compose(
    [
        transforms.Resize((1024, 1024)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]
)
remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, DEVICE)


# ── UniRig helper ─────────────────────────────────────────────────────────────
def _run_unirig(glb_path: str, work_dir: str) -> str:
    env = os.environ.copy()
    env["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

    def _launch(script, args):
        sh = os.path.join(UNIRIG_DIR, "launch", "inference", script)
        r = subprocess.run(
            ["bash", sh] + args,
            cwd=UNIRIG_DIR,
            capture_output=True,
            text=True,
            timeout=300,
            env=env,
        )
        if r.returncode != 0:
            raise RuntimeError(f"UniRig {script} failed:\n{r.stderr[-1000:]}")

    skel = os.path.join(work_dir, "skeleton.fbx")
    skin = os.path.join(work_dir, "skin.fbx")
    out = os.path.join(work_dir, "rigged.glb")

    _launch("generate_skeleton.sh", ["--input", glb_path, "--output", skel])
    _launch("generate_skin.sh", ["--input", skel, "--output", skin])
    _launch("merge.sh", ["--source", skin, "--target", glb_path, "--output", out])

    if not os.path.exists(out):
        fallback = "/tmp/rig_out/rigged.glb"
        if os.path.exists(fallback):
            shutil.copy2(fallback, out)
        else:
            raise RuntimeError("UniRig produced no output")
    return out


# ── Main single-pass function ─────────────────────────────────────────────────
@spaces.GPU(duration=600)
@torch.no_grad()
def generate_avatar(image_path: str, seed: int, req: gr.Request):
    save_dir = os.path.join(TMP_DIR, str(req.session_hash))
    os.makedirs(save_dir, exist_ok=True)

    # ── 1. Segment + shape ────────────────────────────────────────────────────
    image_seg = prepare_image(
        image_path, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net
    )
    outputs = triposg_pipe(
        image=image_seg,
        generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
        num_inference_steps=50,
        guidance_scale=7.5,
    ).samples[0]

    mesh = trimesh.Trimesh(
        outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1])
    )
    from utils import simplify_mesh

    mesh = simplify_mesh(mesh, 100000)

    hex_id = os.urandom(4).hex()
    shape_path = os.path.join(save_dir, f"shape_{hex_id}.glb")
    mesh.export(shape_path)
    torch.cuda.empty_cache()

    # ── 2. Texture ────────────────────────────────────────────────────────────
    height = width = 768
    cameras = get_orthogonal_camera(
        elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
        distance=[1.8] * NUM_VIEWS,
        left=-0.55,
        right=0.55,
        bottom=-0.55,
        top=0.55,
        azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
        device=DEVICE,
    )
    ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")
    mesh_loaded = load_mesh(shape_path, rescale=True, device=DEVICE)
    render_out = render(
        ctx,
        mesh_loaded,
        cameras,
        height=height,
        width=width,
        render_attr=False,
        normal_background=0.0,
    )
    control_images = (
        torch.cat(
            [
                (render_out.pos + 0.5).clamp(0, 1),
                (render_out.normal / 2 + 0.5).clamp(0, 1),
            ],
            dim=-1,
        )
        .permute(0, 3, 1, 2)
        .to(DEVICE)
    )

    ref_image = Image.open(image_path)
    ref_image = remove_bg_fn(ref_image)
    ref_image = preprocess_image(ref_image, height, width)

    gen = torch.Generator(device=DEVICE).manual_seed(seed)
    mv_images = mv_pipe(
        "high quality",
        height=height,
        width=width,
        num_inference_steps=15,
        guidance_scale=3.0,
        num_images_per_prompt=NUM_VIEWS,
        control_image=control_images,
        control_conditioning_scale=1.0,
        reference_image=ref_image,
        reference_conditioning_scale=1.0,
        negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
        cross_attention_kwargs=None,
        generator=gen,
    ).images
    torch.cuda.empty_cache()

    mv_path = os.path.join(save_dir, f"mv_{hex_id}.png")
    make_image_grid(mv_images, rows=1).save(mv_path)

    ensure_texture_extension()
    from texture import TexturePipeline, ModProcessConfig

    tex_pipe = TexturePipeline(
        upscaler_ckpt_path=os.path.join(CKPT_DIR, "RealESRGAN_x2plus.pth"),
        inpaint_ckpt_path=os.path.join(CKPT_DIR, "big-lama.pt"),
        device=DEVICE,
    )
    textured_path = tex_pipe(
        mesh_path=shape_path,
        save_dir=save_dir,
        save_name=f"textured_{hex_id}.glb",
        uv_unwarp=True,
        uv_size=4096,
        rgb_path=mv_path,
        rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
        camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
    )

    # ── 3. UniRig ─────────────────────────────────────────────────────────────
    try:
        rigged_path = _run_unirig(textured_path, save_dir)
    except Exception as e:
        print(f"[UniRig] skipped: {e}")
        rigged_path = textured_path

    # ── 4. PSHuman face ───────────────────────────────────────────────────────
    final_path = rigged_path
    try:
        from PIL import Image as PILImage
        from torchvision.transforms.functional import normalize as _norm

        pshuman_work = os.path.join(save_dir, "pshuman")
        os.makedirs(pshuman_work, exist_ok=True)
        face_png = os.path.join(pshuman_work, "face.png")

        # RMBG mask for portrait
        img_t = (
            transforms.ToTensor()(Image.open(image_path).resize((1024, 1024)))
            .unsqueeze(0)
            .to(DEVICE)
        )
        img_norm = _norm(img_t, [0.5] * 3, [1.0] * 3)
        with torch.no_grad():
            mask = rmbg_net(img_norm)[0][0].sigmoid()[0, 0].cpu().numpy()
        mask_pil = Image.fromarray((mask * 255).astype(np.uint8)).resize(
            Image.open(image_path).size
        )
        rgba = Image.open(image_path).convert("RGBA")
        rgba.putalpha(mask_pil)
        rgba.save(face_png)

        pshuman_out = os.path.join(pshuman_work, "out")
        cfg = os.path.join(PSHUMAN_DIR, "configs/inference-768-6view.yaml")
        subprocess.run(
            [
                sys.executable,
                os.path.join(PSHUMAN_DIR, "inference.py"),
                "--config",
                cfg,
                "--data_dir",
                pshuman_work,
                "--case_name",
                "face",
                "--output_dir",
                pshuman_out,
                "--pretrained_model_name_or_path",
                PSHUMAN_CKPT,
            ],
            capture_output=True,
            text=True,
            timeout=300,
        )

        face_obj = (
            next(
                (
                    os.path.join(pshuman_out, f)
                    for f in os.listdir(pshuman_out)
                    if f.endswith(".obj")
                ),
                None,
            )
            if os.path.exists(pshuman_out)
            else None
        )

        if face_obj:
            sys.path.insert(0, os.path.join(ROOT, "..", "pipeline"))
            from face_transplant import transplant_face

            final_path = os.path.join(save_dir, f"avatar_{hex_id}.glb")
            transplant_face(
                body_glb_path=rigged_path,
                pshuman_mesh_path=face_obj,
                output_path=final_path,
                weight_threshold=0.5,
                retract_mm=2.0,
            )
    except Exception as e:
        print(f"[PSHuman] skipped: {e}")

    return final_path


# ── UI ────────────────────────────────────────────────────────────────────────
def start_session(req: gr.Request):
    os.makedirs(os.path.join(TMP_DIR, str(req.session_hash)), exist_ok=True)


def end_session(req: gr.Request):
    d = os.path.join(TMP_DIR, str(req.session_hash))
    if os.path.exists(d):
        shutil.rmtree(d)


def get_seed(randomize, seed):
    return random.randint(0, MAX_SEED) if randomize else seed


with gr.Blocks(title="MeshForge") as demo:
    gr.Markdown(
        "# 🧊 MeshForge\n"
        "### Portrait → Textured 3D Mesh → Rigged Avatar\n"
        "Upload a portrait photo and click **Generate Avatar**. "
        "The full pipeline (shape → texture → rig → face) runs in one pass."
    )
    with gr.Row():
        with gr.Column(scale=1):
            portrait = gr.Image(label="Portrait Photo", type="filepath")
            seed = gr.Slider(0, MAX_SEED, value=0, step=1, label="Seed")
            randomize = gr.Checkbox(True, label="Randomize seed")
            btn = gr.Button("Generate Avatar", variant="primary")
        with gr.Column(scale=1):
            output_3d = gr.Model3D(label="Rigged Avatar")
            output_dl = gr.File(label="Download GLB")

    btn.click(get_seed, inputs=[randomize, seed], outputs=[seed]).then(
        generate_avatar,
        inputs=[portrait, seed],
        outputs=[output_3d],
    ).then(lambda x: x, inputs=output_3d, outputs=output_dl)

    demo.load(start_session)
    demo.unload(end_session)

demo.launch()