MeshForge / ZeroGPU /app.py
Daankular's picture
Upload folder using huggingface_hub
05600b2 verified
import spaces
import os
import gradio as gr
import numpy as np
import torch
from PIL import Image
import trimesh
import random
import tempfile
import shutil
from transformers import AutoModelForImageSegmentation
from torchvision import transforms
from huggingface_hub import hf_hub_download, snapshot_download
import subprocess
import sys
import urllib.request
# install extras
subprocess.run("pip install spandrel==0.4.1 --no-deps", shell=True, check=True)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16
MAX_SEED = np.iinfo(np.int32).max
# ── Repos ─────────────────────────────────────────────────────────────────────
ROOT = os.path.dirname(os.path.abspath(__file__))
TRIPOSG_DIR = os.path.join(ROOT, "triposg")
MV_DIR = os.path.join(ROOT, "mv_adapter")
UNIRIG_DIR = os.path.join(ROOT, "unirig")
PSHUMAN_DIR = os.path.join(ROOT, "pshuman")
TMP_DIR = os.path.join(ROOT, "tmp")
os.makedirs(TMP_DIR, exist_ok=True)
CKPT_DIR = os.path.join(ROOT, "checkpoints")
os.makedirs(CKPT_DIR, exist_ok=True)
TEXTURE_EXT_NAME = "texture.cpython-310-x86_64-linux-gnu.so"
TEXTURE_EXT_URL = (
f"https://huggingface.co/spaces/VAST-AI/TripoSG/resolve/main/{TEXTURE_EXT_NAME}"
)
if not os.path.exists(TRIPOSG_DIR):
os.system(
f"git clone https://github.com/VAST-AI-Research/TripoSG.git {TRIPOSG_DIR}"
)
if not os.path.exists(MV_DIR):
os.system(
f"git clone https://github.com/huanngzh/MV-Adapter.git {MV_DIR} && "
f"cd {MV_DIR} && git checkout 7d37a97e9bc223cdb8fd26a76bd8dd46504c7c3d"
)
if not os.path.exists(UNIRIG_DIR):
os.system(f"git clone https://github.com/VAST-AI-Research/UniRig.git {UNIRIG_DIR}")
if not os.path.exists(PSHUMAN_DIR):
os.system(f"git clone https://github.com/pengHTYX/PSHuman.git {PSHUMAN_DIR}")
for p in [
TRIPOSG_DIR,
os.path.join(TRIPOSG_DIR, "scripts"),
MV_DIR,
os.path.join(MV_DIR, "scripts"),
]:
if p not in sys.path:
sys.path.insert(0, p)
def ensure_texture_extension() -> str:
texture_ext_path = os.path.join(ROOT, TEXTURE_EXT_NAME)
if os.path.exists(texture_ext_path):
return texture_ext_path
print(f"[startup] downloading {TEXTURE_EXT_NAME}...")
urllib.request.urlretrieve(TEXTURE_EXT_URL, texture_ext_path)
return texture_ext_path
# ── Models ────────────────────────────────────────────────────────────────────
RMBG_CKPT = os.path.join(CKPT_DIR, "RMBG-1.4")
TRIPOSG_CKPT = os.path.join(CKPT_DIR, "TripoSG")
PSHUMAN_CKPT = os.path.join(CKPT_DIR, "PSHuman")
snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_CKPT)
snapshot_download("VAST-AI/TripoSG", local_dir=TRIPOSG_CKPT)
snapshot_download("pengHTYX/PSHuman_Unclip_768_6views", local_dir=PSHUMAN_CKPT)
if not os.path.exists(os.path.join(CKPT_DIR, "RealESRGAN_x2plus.pth")):
hf_hub_download(
"dtarnow/UPscaler", filename="RealESRGAN_x2plus.pth", local_dir=CKPT_DIR
)
if not os.path.exists(os.path.join(CKPT_DIR, "big-lama.pt")):
subprocess.run(
f"wget -q -P {CKPT_DIR} "
"https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt",
shell=True,
check=True,
)
from image_process import prepare_image
from briarmbg import BriaRMBG
rmbg_net = BriaRMBG.from_pretrained(RMBG_CKPT).to(DEVICE).eval()
from triposg.pipelines.pipeline_triposg import TripoSGPipeline
triposg_pipe = TripoSGPipeline.from_pretrained(TRIPOSG_CKPT).to(DEVICE, DTYPE)
NUM_VIEWS = 6
from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg
from mvadapter.utils import get_orthogonal_camera, make_image_grid
from mvadapter.utils.render import NVDiffRastContextWrapper, load_mesh, render
mv_pipe = prepare_pipeline(
base_model="stabilityai/stable-diffusion-xl-base-1.0",
vae_model="madebyollin/sdxl-vae-fp16-fix",
unet_model=None,
lora_model=None,
adapter_path="huanngzh/mv-adapter",
scheduler=None,
num_views=NUM_VIEWS,
device=DEVICE,
dtype=DTYPE,
)
birefnet = AutoModelForImageSegmentation.from_pretrained(
"ZhengPeng7/BiRefNet", trust_remote_code=True
).to(DEVICE)
transform_image = transforms.Compose(
[
transforms.Resize((1024, 1024)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
]
)
remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, DEVICE)
# ── UniRig helper ─────────────────────────────────────────────────────────────
def _run_unirig(glb_path: str, work_dir: str) -> str:
env = os.environ.copy()
env["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
def _launch(script, args):
sh = os.path.join(UNIRIG_DIR, "launch", "inference", script)
r = subprocess.run(
["bash", sh] + args,
cwd=UNIRIG_DIR,
capture_output=True,
text=True,
timeout=300,
env=env,
)
if r.returncode != 0:
raise RuntimeError(f"UniRig {script} failed:\n{r.stderr[-1000:]}")
skel = os.path.join(work_dir, "skeleton.fbx")
skin = os.path.join(work_dir, "skin.fbx")
out = os.path.join(work_dir, "rigged.glb")
_launch("generate_skeleton.sh", ["--input", glb_path, "--output", skel])
_launch("generate_skin.sh", ["--input", skel, "--output", skin])
_launch("merge.sh", ["--source", skin, "--target", glb_path, "--output", out])
if not os.path.exists(out):
fallback = "/tmp/rig_out/rigged.glb"
if os.path.exists(fallback):
shutil.copy2(fallback, out)
else:
raise RuntimeError("UniRig produced no output")
return out
# ── Main single-pass function ─────────────────────────────────────────────────
@spaces.GPU(duration=600)
@torch.no_grad()
def generate_avatar(image_path: str, seed: int, req: gr.Request):
save_dir = os.path.join(TMP_DIR, str(req.session_hash))
os.makedirs(save_dir, exist_ok=True)
# ── 1. Segment + shape ────────────────────────────────────────────────────
image_seg = prepare_image(
image_path, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net
)
outputs = triposg_pipe(
image=image_seg,
generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
num_inference_steps=50,
guidance_scale=7.5,
).samples[0]
mesh = trimesh.Trimesh(
outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1])
)
from utils import simplify_mesh
mesh = simplify_mesh(mesh, 100000)
hex_id = os.urandom(4).hex()
shape_path = os.path.join(save_dir, f"shape_{hex_id}.glb")
mesh.export(shape_path)
torch.cuda.empty_cache()
# ── 2. Texture ────────────────────────────────────────────────────────────
height = width = 768
cameras = get_orthogonal_camera(
elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
distance=[1.8] * NUM_VIEWS,
left=-0.55,
right=0.55,
bottom=-0.55,
top=0.55,
azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
device=DEVICE,
)
ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")
mesh_loaded = load_mesh(shape_path, rescale=True, device=DEVICE)
render_out = render(
ctx,
mesh_loaded,
cameras,
height=height,
width=width,
render_attr=False,
normal_background=0.0,
)
control_images = (
torch.cat(
[
(render_out.pos + 0.5).clamp(0, 1),
(render_out.normal / 2 + 0.5).clamp(0, 1),
],
dim=-1,
)
.permute(0, 3, 1, 2)
.to(DEVICE)
)
ref_image = Image.open(image_path)
ref_image = remove_bg_fn(ref_image)
ref_image = preprocess_image(ref_image, height, width)
gen = torch.Generator(device=DEVICE).manual_seed(seed)
mv_images = mv_pipe(
"high quality",
height=height,
width=width,
num_inference_steps=15,
guidance_scale=3.0,
num_images_per_prompt=NUM_VIEWS,
control_image=control_images,
control_conditioning_scale=1.0,
reference_image=ref_image,
reference_conditioning_scale=1.0,
negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
cross_attention_kwargs=None,
generator=gen,
).images
torch.cuda.empty_cache()
mv_path = os.path.join(save_dir, f"mv_{hex_id}.png")
make_image_grid(mv_images, rows=1).save(mv_path)
ensure_texture_extension()
from texture import TexturePipeline, ModProcessConfig
tex_pipe = TexturePipeline(
upscaler_ckpt_path=os.path.join(CKPT_DIR, "RealESRGAN_x2plus.pth"),
inpaint_ckpt_path=os.path.join(CKPT_DIR, "big-lama.pt"),
device=DEVICE,
)
textured_path = tex_pipe(
mesh_path=shape_path,
save_dir=save_dir,
save_name=f"textured_{hex_id}.glb",
uv_unwarp=True,
uv_size=4096,
rgb_path=mv_path,
rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
)
# ── 3. UniRig ─────────────────────────────────────────────────────────────
try:
rigged_path = _run_unirig(textured_path, save_dir)
except Exception as e:
print(f"[UniRig] skipped: {e}")
rigged_path = textured_path
# ── 4. PSHuman face ───────────────────────────────────────────────────────
final_path = rigged_path
try:
from PIL import Image as PILImage
from torchvision.transforms.functional import normalize as _norm
pshuman_work = os.path.join(save_dir, "pshuman")
os.makedirs(pshuman_work, exist_ok=True)
face_png = os.path.join(pshuman_work, "face.png")
# RMBG mask for portrait
img_t = (
transforms.ToTensor()(Image.open(image_path).resize((1024, 1024)))
.unsqueeze(0)
.to(DEVICE)
)
img_norm = _norm(img_t, [0.5] * 3, [1.0] * 3)
with torch.no_grad():
mask = rmbg_net(img_norm)[0][0].sigmoid()[0, 0].cpu().numpy()
mask_pil = Image.fromarray((mask * 255).astype(np.uint8)).resize(
Image.open(image_path).size
)
rgba = Image.open(image_path).convert("RGBA")
rgba.putalpha(mask_pil)
rgba.save(face_png)
pshuman_out = os.path.join(pshuman_work, "out")
cfg = os.path.join(PSHUMAN_DIR, "configs/inference-768-6view.yaml")
subprocess.run(
[
sys.executable,
os.path.join(PSHUMAN_DIR, "inference.py"),
"--config",
cfg,
"--data_dir",
pshuman_work,
"--case_name",
"face",
"--output_dir",
pshuman_out,
"--pretrained_model_name_or_path",
PSHUMAN_CKPT,
],
capture_output=True,
text=True,
timeout=300,
)
face_obj = (
next(
(
os.path.join(pshuman_out, f)
for f in os.listdir(pshuman_out)
if f.endswith(".obj")
),
None,
)
if os.path.exists(pshuman_out)
else None
)
if face_obj:
sys.path.insert(0, os.path.join(ROOT, "..", "pipeline"))
from face_transplant import transplant_face
final_path = os.path.join(save_dir, f"avatar_{hex_id}.glb")
transplant_face(
body_glb_path=rigged_path,
pshuman_mesh_path=face_obj,
output_path=final_path,
weight_threshold=0.5,
retract_mm=2.0,
)
except Exception as e:
print(f"[PSHuman] skipped: {e}")
return final_path
# ── UI ────────────────────────────────────────────────────────────────────────
def start_session(req: gr.Request):
os.makedirs(os.path.join(TMP_DIR, str(req.session_hash)), exist_ok=True)
def end_session(req: gr.Request):
d = os.path.join(TMP_DIR, str(req.session_hash))
if os.path.exists(d):
shutil.rmtree(d)
def get_seed(randomize, seed):
return random.randint(0, MAX_SEED) if randomize else seed
with gr.Blocks(title="MeshForge") as demo:
gr.Markdown(
"# 🧊 MeshForge\n"
"### Portrait β†’ Textured 3D Mesh β†’ Rigged Avatar\n"
"Upload a portrait photo and click **Generate Avatar**. "
"The full pipeline (shape β†’ texture β†’ rig β†’ face) runs in one pass."
)
with gr.Row():
with gr.Column(scale=1):
portrait = gr.Image(label="Portrait Photo", type="filepath")
seed = gr.Slider(0, MAX_SEED, value=0, step=1, label="Seed")
randomize = gr.Checkbox(True, label="Randomize seed")
btn = gr.Button("Generate Avatar", variant="primary")
with gr.Column(scale=1):
output_3d = gr.Model3D(label="Rigged Avatar")
output_dl = gr.File(label="Download GLB")
btn.click(get_seed, inputs=[randomize, seed], outputs=[seed]).then(
generate_avatar,
inputs=[portrait, seed],
outputs=[output_3d],
).then(lambda x: x, inputs=output_3d, outputs=output_dl)
demo.load(start_session)
demo.unload(end_session)
demo.launch()