Spaces:

Daankular
/

MeshForge

Runtime error

App Files Files Community

MeshForge / ZeroGPU /app.py

Daankular

Upload folder using huggingface_hub

05600b2 verified 12 days ago

raw

history blame contribute delete

14.6 kB

	import spaces
	import os
	import gradio as gr
	import numpy as np
	import torch
	from PIL import Image
	import trimesh
	import random
	import tempfile
	import shutil
	from transformers import AutoModelForImageSegmentation
	from torchvision import transforms
	from huggingface_hub import hf_hub_download, snapshot_download
	import subprocess
	import sys
	import urllib.request

	# install extras
	subprocess.run("pip install spandrel==0.4.1 --no-deps", shell=True, check=True)

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	DTYPE = torch.float16
	MAX_SEED = np.iinfo(np.int32).max

	# ── Repos ─────────────────────────────────────────────────────────────────────
	ROOT = os.path.dirname(os.path.abspath(__file__))

	TRIPOSG_DIR = os.path.join(ROOT, "triposg")
	MV_DIR = os.path.join(ROOT, "mv_adapter")
	UNIRIG_DIR = os.path.join(ROOT, "unirig")
	PSHUMAN_DIR = os.path.join(ROOT, "pshuman")
	TMP_DIR = os.path.join(ROOT, "tmp")
	os.makedirs(TMP_DIR, exist_ok=True)
	CKPT_DIR = os.path.join(ROOT, "checkpoints")
	os.makedirs(CKPT_DIR, exist_ok=True)

	TEXTURE_EXT_NAME = "texture.cpython-310-x86_64-linux-gnu.so"
	TEXTURE_EXT_URL = (
	f"https://huggingface.co/spaces/VAST-AI/TripoSG/resolve/main/{TEXTURE_EXT_NAME}"
	)

	if not os.path.exists(TRIPOSG_DIR):
	os.system(
	f"git clone https://github.com/VAST-AI-Research/TripoSG.git {TRIPOSG_DIR}"
	)

	if not os.path.exists(MV_DIR):
	os.system(
	f"git clone https://github.com/huanngzh/MV-Adapter.git {MV_DIR} && "
	f"cd {MV_DIR} && git checkout 7d37a97e9bc223cdb8fd26a76bd8dd46504c7c3d"
	)

	if not os.path.exists(UNIRIG_DIR):
	os.system(f"git clone https://github.com/VAST-AI-Research/UniRig.git {UNIRIG_DIR}")

	if not os.path.exists(PSHUMAN_DIR):
	os.system(f"git clone https://github.com/pengHTYX/PSHuman.git {PSHUMAN_DIR}")

	for p in [
	TRIPOSG_DIR,
	os.path.join(TRIPOSG_DIR, "scripts"),
	MV_DIR,
	os.path.join(MV_DIR, "scripts"),
	]:
	if p not in sys.path:
	sys.path.insert(0, p)


	def ensure_texture_extension() -> str:
	texture_ext_path = os.path.join(ROOT, TEXTURE_EXT_NAME)
	if os.path.exists(texture_ext_path):
	return texture_ext_path
	print(f"[startup] downloading {TEXTURE_EXT_NAME}...")
	urllib.request.urlretrieve(TEXTURE_EXT_URL, texture_ext_path)
	return texture_ext_path


	# ── Models ────────────────────────────────────────────────────────────────────
	RMBG_CKPT = os.path.join(CKPT_DIR, "RMBG-1.4")
	TRIPOSG_CKPT = os.path.join(CKPT_DIR, "TripoSG")
	PSHUMAN_CKPT = os.path.join(CKPT_DIR, "PSHuman")

	snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_CKPT)
	snapshot_download("VAST-AI/TripoSG", local_dir=TRIPOSG_CKPT)
	snapshot_download("pengHTYX/PSHuman_Unclip_768_6views", local_dir=PSHUMAN_CKPT)

	if not os.path.exists(os.path.join(CKPT_DIR, "RealESRGAN_x2plus.pth")):
	hf_hub_download(
	"dtarnow/UPscaler", filename="RealESRGAN_x2plus.pth", local_dir=CKPT_DIR
	)

	if not os.path.exists(os.path.join(CKPT_DIR, "big-lama.pt")):
	subprocess.run(
	f"wget -q -P {CKPT_DIR} "
	"https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt",
	shell=True,
	check=True,
	)

	from image_process import prepare_image
	from briarmbg import BriaRMBG

	rmbg_net = BriaRMBG.from_pretrained(RMBG_CKPT).to(DEVICE).eval()

	from triposg.pipelines.pipeline_triposg import TripoSGPipeline

	triposg_pipe = TripoSGPipeline.from_pretrained(TRIPOSG_CKPT).to(DEVICE, DTYPE)

	NUM_VIEWS = 6
	from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg
	from mvadapter.utils import get_orthogonal_camera, make_image_grid
	from mvadapter.utils.render import NVDiffRastContextWrapper, load_mesh, render

	mv_pipe = prepare_pipeline(
	base_model="stabilityai/stable-diffusion-xl-base-1.0",
	vae_model="madebyollin/sdxl-vae-fp16-fix",
	unet_model=None,
	lora_model=None,
	adapter_path="huanngzh/mv-adapter",
	scheduler=None,
	num_views=NUM_VIEWS,
	device=DEVICE,
	dtype=DTYPE,
	)
	birefnet = AutoModelForImageSegmentation.from_pretrained(
	"ZhengPeng7/BiRefNet", trust_remote_code=True
	).to(DEVICE)
	transform_image = transforms.Compose(
	[
	transforms.Resize((1024, 1024)),
	transforms.ToTensor(),
	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
	]
	)
	remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, DEVICE)


	# ── UniRig helper ─────────────────────────────────────────────────────────────
	def _run_unirig(glb_path: str, work_dir: str) -> str:
	env = os.environ.copy()
	env["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

	def _launch(script, args):
	sh = os.path.join(UNIRIG_DIR, "launch", "inference", script)
	r = subprocess.run(
	["bash", sh] + args,
	cwd=UNIRIG_DIR,
	capture_output=True,
	text=True,
	timeout=300,
	env=env,
	)
	if r.returncode != 0:
	raise RuntimeError(f"UniRig {script} failed:\n{r.stderr[-1000:]}")

	skel = os.path.join(work_dir, "skeleton.fbx")
	skin = os.path.join(work_dir, "skin.fbx")
	out = os.path.join(work_dir, "rigged.glb")

	_launch("generate_skeleton.sh", ["--input", glb_path, "--output", skel])
	_launch("generate_skin.sh", ["--input", skel, "--output", skin])
	_launch("merge.sh", ["--source", skin, "--target", glb_path, "--output", out])

	if not os.path.exists(out):
	fallback = "/tmp/rig_out/rigged.glb"
	if os.path.exists(fallback):
	shutil.copy2(fallback, out)
	else:
	raise RuntimeError("UniRig produced no output")
	return out


	# ── Main single-pass function ─────────────────────────────────────────────────
	@spaces.GPU(duration=600)
	@torch.no_grad()
	def generate_avatar(image_path: str, seed: int, req: gr.Request):
	save_dir = os.path.join(TMP_DIR, str(req.session_hash))
	os.makedirs(save_dir, exist_ok=True)

	# ── 1. Segment + shape ────────────────────────────────────────────────────
	image_seg = prepare_image(
	image_path, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net
	)
	outputs = triposg_pipe(
	image=image_seg,
	generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
	num_inference_steps=50,
	guidance_scale=7.5,
	).samples[0]

	mesh = trimesh.Trimesh(
	outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1])
	)
	from utils import simplify_mesh

	mesh = simplify_mesh(mesh, 100000)

	hex_id = os.urandom(4).hex()
	shape_path = os.path.join(save_dir, f"shape_{hex_id}.glb")
	mesh.export(shape_path)
	torch.cuda.empty_cache()

	# ── 2. Texture ────────────────────────────────────────────────────────────
	height = width = 768
	cameras = get_orthogonal_camera(
	elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
	distance=[1.8] * NUM_VIEWS,
	left=-0.55,
	right=0.55,
	bottom=-0.55,
	top=0.55,
	azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
	device=DEVICE,
	)
	ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")
	mesh_loaded = load_mesh(shape_path, rescale=True, device=DEVICE)
	render_out = render(
	ctx,
	mesh_loaded,
	cameras,
	height=height,
	width=width,
	render_attr=False,
	normal_background=0.0,
	)
	control_images = (
	torch.cat(
	[
	(render_out.pos + 0.5).clamp(0, 1),
	(render_out.normal / 2 + 0.5).clamp(0, 1),
	],
	dim=-1,
	)
	.permute(0, 3, 1, 2)
	.to(DEVICE)
	)

	ref_image = Image.open(image_path)
	ref_image = remove_bg_fn(ref_image)
	ref_image = preprocess_image(ref_image, height, width)

	gen = torch.Generator(device=DEVICE).manual_seed(seed)
	mv_images = mv_pipe(
	"high quality",
	height=height,
	width=width,
	num_inference_steps=15,
	guidance_scale=3.0,
	num_images_per_prompt=NUM_VIEWS,
	control_image=control_images,
	control_conditioning_scale=1.0,
	reference_image=ref_image,
	reference_conditioning_scale=1.0,
	negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
	cross_attention_kwargs=None,
	generator=gen,
	).images
	torch.cuda.empty_cache()

	mv_path = os.path.join(save_dir, f"mv_{hex_id}.png")
	make_image_grid(mv_images, rows=1).save(mv_path)

	ensure_texture_extension()
	from texture import TexturePipeline, ModProcessConfig

	tex_pipe = TexturePipeline(
	upscaler_ckpt_path=os.path.join(CKPT_DIR, "RealESRGAN_x2plus.pth"),
	inpaint_ckpt_path=os.path.join(CKPT_DIR, "big-lama.pt"),
	device=DEVICE,
	)
	textured_path = tex_pipe(
	mesh_path=shape_path,
	save_dir=save_dir,
	save_name=f"textured_{hex_id}.glb",
	uv_unwarp=True,
	uv_size=4096,
	rgb_path=mv_path,
	rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
	camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
	)

	# ── 3. UniRig ─────────────────────────────────────────────────────────────
	try:
	rigged_path = _run_unirig(textured_path, save_dir)
	except Exception as e:
	print(f"[UniRig] skipped: {e}")
	rigged_path = textured_path

	# ── 4. PSHuman face ───────────────────────────────────────────────────────
	final_path = rigged_path
	try:
	from PIL import Image as PILImage
	from torchvision.transforms.functional import normalize as _norm

	pshuman_work = os.path.join(save_dir, "pshuman")
	os.makedirs(pshuman_work, exist_ok=True)
	face_png = os.path.join(pshuman_work, "face.png")

	# RMBG mask for portrait
	img_t = (
	transforms.ToTensor()(Image.open(image_path).resize((1024, 1024)))
	.unsqueeze(0)
	.to(DEVICE)
	)
	img_norm = _norm(img_t, [0.5] * 3, [1.0] * 3)
	with torch.no_grad():
	mask = rmbg_net(img_norm)[0][0].sigmoid()[0, 0].cpu().numpy()
	mask_pil = Image.fromarray((mask * 255).astype(np.uint8)).resize(
	Image.open(image_path).size
	)
	rgba = Image.open(image_path).convert("RGBA")
	rgba.putalpha(mask_pil)
	rgba.save(face_png)

	pshuman_out = os.path.join(pshuman_work, "out")
	cfg = os.path.join(PSHUMAN_DIR, "configs/inference-768-6view.yaml")
	subprocess.run(
	[
	sys.executable,
	os.path.join(PSHUMAN_DIR, "inference.py"),
	"--config",
	cfg,
	"--data_dir",
	pshuman_work,
	"--case_name",
	"face",
	"--output_dir",
	pshuman_out,
	"--pretrained_model_name_or_path",
	PSHUMAN_CKPT,
	],
	capture_output=True,
	text=True,
	timeout=300,
	)

	face_obj = (
	next(
	(
	os.path.join(pshuman_out, f)
	for f in os.listdir(pshuman_out)
	if f.endswith(".obj")
	),
	None,
	)
	if os.path.exists(pshuman_out)
	else None
	)

	if face_obj:
	sys.path.insert(0, os.path.join(ROOT, "..", "pipeline"))
	from face_transplant import transplant_face

	final_path = os.path.join(save_dir, f"avatar_{hex_id}.glb")
	transplant_face(
	body_glb_path=rigged_path,
	pshuman_mesh_path=face_obj,
	output_path=final_path,
	weight_threshold=0.5,
	retract_mm=2.0,
	)
	except Exception as e:
	print(f"[PSHuman] skipped: {e}")

	return final_path


	# ── UI ────────────────────────────────────────────────────────────────────────
	def start_session(req: gr.Request):
	os.makedirs(os.path.join(TMP_DIR, str(req.session_hash)), exist_ok=True)


	def end_session(req: gr.Request):
	d = os.path.join(TMP_DIR, str(req.session_hash))
	if os.path.exists(d):
	shutil.rmtree(d)


	def get_seed(randomize, seed):
	return random.randint(0, MAX_SEED) if randomize else seed


	with gr.Blocks(title="MeshForge") as demo:
	gr.Markdown(
	"# 🧊 MeshForge\n"
	"### Portrait → Textured 3D Mesh → Rigged Avatar\n"
	"Upload a portrait photo and click Generate Avatar. "
	"The full pipeline (shape → texture → rig → face) runs in one pass."
	)
	with gr.Row():
	with gr.Column(scale=1):
	portrait = gr.Image(label="Portrait Photo", type="filepath")
	seed = gr.Slider(0, MAX_SEED, value=0, step=1, label="Seed")
	randomize = gr.Checkbox(True, label="Randomize seed")
	btn = gr.Button("Generate Avatar", variant="primary")
	with gr.Column(scale=1):
	output_3d = gr.Model3D(label="Rigged Avatar")
	output_dl = gr.File(label="Download GLB")

	btn.click(get_seed, inputs=[randomize, seed], outputs=[seed]).then(
	generate_avatar,
	inputs=[portrait, seed],
	outputs=[output_3d],
	).then(lambda x: x, inputs=output_3d, outputs=output_dl)

	demo.load(start_session)
	demo.unload(end_session)

	demo.launch()