PartPacker-CPU

Sleeping

App Files Files Community

cpuai commited on 6 days ago

Commit

0846da3

verified ·

1 Parent(s): 377b8f0

Update app.py

Browse files

Files changed (1) hide show

app.py +129 -189

app.py CHANGED Viewed

@@ -9,10 +9,8 @@ from datetime import datetime
 import gradio as gr
 try:
-    # Hugging Face Spaces 环境
     import spaces
 except ImportError:
-    # 本地运行时，提供一个空装饰器兼容
     class spaces:
         class GPU:
             def __init__(self, duration=60):
@@ -26,38 +24,31 @@ from flow.model import Model
 from flow.configs.schema import ModelConfig
 from flow.utils import get_random_color, recenter_foreground
 from vae.utils import postprocess_mesh
-# 下载模型权重
 from huggingface_hub import hf_hub_download
 # =========================
-# CPU 运行参数
 # =========================
 DEVICE = torch.device("cpu")
 DTYPE = torch.float32
-# 可根据 Hugging Face CPU 空间资源调整线程数
-# 一般 2~4 比较稳，过高不一定更快
 CPU_THREADS = int(os.environ.get("CPU_THREADS", "2"))
 torch.set_num_threads(CPU_THREADS)
-torch.set_num_interop_threads(max(1, min(CPU_THREADS, 2)))
-# 为了减少 CPU 空间内存压力，允许通过环境变量控制
-DEFAULT_NUM_STEPS = int(os.environ.get("DEFAULT_NUM_STEPS", "20"))
-DEFAULT_GRID_RES = int(os.environ.get("DEFAULT_GRID_RES", "256"))
-DEFAULT_CFG_SCALE = float(os.environ.get("DEFAULT_CFG_SCALE", "7.0"))
-flow_ckpt_path = hf_hub_download(repo_id="nvidia/PartPacker", filename="flow.pt")
-vae_ckpt_path = hf_hub_download(repo_id="nvidia/PartPacker", filename="vae.pt")
 TRIMESH_GLB_EXPORT = np.array(
     [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
     dtype=np.float32
 )
 MAX_SEED = np.iinfo(np.int32).max
 bg_remover = rembg.new_session()
 # =========================
 # 模型配置
 # =========================
@@ -77,32 +68,58 @@ model_config = ModelConfig(
 )
 # =========================
-# 初始化模型（CPU）
 # =========================
-print("正在加载模型到 CPU...")
-model = Model(model_config).eval().to(DEVICE, dtype=DTYPE)
-# 显式使用 CPU 加载权重
 ckpt_dict = torch.load(flow_ckpt_path, map_location=DEVICE, weights_only=True)
 model.load_state_dict(ckpt_dict, strict=True)
 print("模型加载完成。")
 def get_random_seed(randomize_seed, seed):
-    """
-    获取随机种子。
-    """
     if randomize_seed:
         seed = np.random.randint(0, MAX_SEED)
-    return seed
 def process_image(image_path):
     """
     处理输入图片：
-    1. 读取图片
-    2. 如果没有 alpha 通道则自动抠图
-    3. 将主体重新居中
     4. 缩放到模型输入尺寸
     """
     if image_path is None:
@@ -113,15 +130,13 @@ def process_image(image_path):
         raise gr.Error("图片读取失败，请上传有效图片。")
     if image.ndim == 2:
-        # 灰度图转 RGBA
         image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGBA)
     if image.shape[-1] == 4:
         image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA)
     else:
         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        # 没有 alpha 通道时自动去背景
-        image = rembg.remove(image, session=bg_remover)  # [H, W, 4]
     mask = image[..., -1] > 0
     image = recenter_foreground(image, mask, border_ratio=0.1)
@@ -131,55 +146,71 @@ def process_image(image_path):
 def process_3d(
     input_image,
-    num_steps=DEFAULT_NUM_STEPS,
-    cfg_scale=DEFAULT_CFG_SCALE,
-    grid_res=DEFAULT_GRID_RES,
     seed=42,
-    simplify_mesh=False,
-    target_num_faces=50000
 ):
     """
-    在 CPU 上执行 3D 生成。
-    注意：
-    - CPU 推理会很慢
-    - 建议降低 num_steps 和 grid_res
     """
     if input_image is None:
-        raise gr.Error("请先处理并确认输入图片。")
     try:
-        # 设置随机种子
-        kiui.seed_everything(seed)
-        # 输出目录
         os.makedirs("output", exist_ok=True)
         output_glb_path = f"output/partpacker_{datetime.now().strftime('%Y%m%d_%H%M%S')}.glb"
-        # 输入图像：RGBA uint8 -> float32
         image = input_image.astype(np.float32) / 255.0
-        # 将透明背景混合到白底
-        image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
         image_tensor = (
             torch.from_numpy(image)
             .permute(2, 0, 1)
             .contiguous()
             .unsqueeze(0)
-            .to(DEVICE, dtype=DTYPE)
         )
-        data = {"cond_images": image_tensor}
-        # 主模型推理
         with torch.inference_mode():
-            results = model(data, num_steps=int(num_steps), cfg_scale=float(cfg_scale))
         latent = results["latent"]
-        # 切分两个 part
-        data_part0 = {"latent": latent[:, : model.config.latent_size, :]}
-        data_part1 = {"latent": latent[:, model.config.latent_size:, :]}
         with torch.inference_mode():
             results_part0 = model.vae(data_part0, resolution=int(grid_res))
@@ -190,14 +221,12 @@ def process_3d(
         parts = []
-        # 处理第一部分 mesh
         vertices, faces = results_part0["meshes"][0]
         mesh_part0 = trimesh.Trimesh(vertices, faces, process=False)
         mesh_part0.vertices = mesh_part0.vertices @ TRIMESH_GLB_EXPORT.T
         mesh_part0 = postprocess_mesh(mesh_part0, int(target_num_faces))
         parts.extend(mesh_part0.split(only_watertight=False))
-        # 处理第二部分 mesh
         vertices, faces = results_part1["meshes"][0]
         mesh_part1 = trimesh.Trimesh(vertices, faces, process=False)
         mesh_part1.vertices = mesh_part1.vertices @ TRIMESH_GLB_EXPORT.T
@@ -205,13 +234,11 @@ def process_3d(
         parts.extend(mesh_part1.split(only_watertight=False))
         if len(parts) == 0:
-            raise gr.Error("模型生成失败：没有得到有效网格。你可以尝试更换图片或降低参数。")
-        # 给不同 part 赋不同颜色
         for j, part in enumerate(parts):
             part.visual.vertex_colors = get_random_color(j, use_float=True)
-        # 导出为 GLB
         scene = trimesh.Scene(parts)
         scene.export(output_glb_path)
@@ -219,167 +246,92 @@ def process_3d(
     except Exception as e:
         raise gr.Error(
-            f"CPU 生成失败：{str(e)}\n\n"
-            f"建议尝试：\n"
-            f"1. 将 Inference Steps 降到 10~20\n"
-            f"2. 将 Grid Resolution 降到 256\n"
-            f"3. 勾选 Simplify Mesh\n"
-            f"4. 使用主体清晰、背景简单的图片"
         )
-# =========================
-# Gradio UI
-# =========================
-_TITLE = "🎨 Image to 3D Model - CPU Version for Hugging Face Spaces"
 _DESCRIPTION = """
-<div style="text-align: center; margin-bottom: 20px;">
-    <h3 style="color: #2e7d32;">✨ CPU 版本：将 2D 图片转换为 3D 模型 ✨</h3>
-</div>
-### 🚀 说明：
-这是一个适配 **Hugging Face CPU Space** 的版本，已经移除了 GPU 强依赖。
-### ⚠️ 注意：
-- CPU 推理会明显比 GPU 慢���多
-- 建议使用较低参数，避免超时或内存不足
-- 推荐从默认参数开始测试
-### 📖 使用方法：
-1. 上传图片
-2. 可选调整参数
-3. 点击生成
-4. 等待生成 GLB 模型
-### 💡 CPU 环境建议：
-- Inference Steps：建议 10~20
-- Grid Resolution：建议 256
-- 建议勾选 Simplify Mesh
 """
-block = gr.Blocks(title=_TITLE).queue(max_size=4)
 with block:
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown("# " + _TITLE)
     gr.Markdown(_DESCRIPTION)
     with gr.Row():
-        with gr.Column(scale=1):
-            with gr.Row():
-                input_image = gr.Image(
-                    label="📷 Upload Image",
-                    type="filepath"
-                )
-                seg_image = gr.Image(
-                    label="🔍 Processed Image",
-                    type="numpy",
-                    interactive=False,
-                    image_mode="RGBA"
-                )
-            with gr.Accordion("⚙️ Advanced Settings", open=False):
-                gr.Markdown("""
-### 参数说明（CPU 推荐）：
-- **Inference Steps**：步数越多越慢，CPU 建议 10~20
-- **CFG Scale**：控制生成贴合程度
-- **Grid Resolution**：越高越精细，但 CPU 更慢、更吃内存
-- **Random Seed**：固定后可复现结果
-- **Simplify Mesh**：建议开启，减少面数
-                """)
                 num_steps = gr.Slider(
                     label="Inference Steps",
                     minimum=1,
-                    maximum=50,
                     step=1,
-                    value=DEFAULT_NUM_STEPS,
-                    info="CPU 推荐：10~20"
                 )
                 cfg_scale = gr.Slider(
                     label="CFG Scale",
                     minimum=2.0,
                     maximum=10.0,
                     step=0.1,
-                    value=DEFAULT_CFG_SCALE,
-                    info="推荐：6~8"
                 )
                 input_grid_res = gr.Slider(
                     label="Grid Resolution",
-                    minimum=128,
-                    maximum=384,
                     step=1,
-                    value=DEFAULT_GRID_RES,
-                    info="CPU 推荐：256"
                 )
                 with gr.Row():
-                    randomize_seed = gr.Checkbox(
-                        label="Randomize Seed",
-                        value=True,
-                        info="每次使用不同种子"
-                    )
-                    seed = gr.Slider(
-                        label="Seed Value",
-                        minimum=0,
-                        maximum=MAX_SEED,
-                        step=1,
-                        value=0
-                    )
                 with gr.Row():
-                    simplify_mesh = gr.Checkbox(
-                        label="Simplify Mesh",
-                        value=True,
-                        info="CPU 环境建议开启"
-                    )
                     target_num_faces = gr.Slider(
-                        label="Target Face Count",
                         minimum=5000,
-                        maximum=100000,
                         step=1000,
-                        value=50000,
-                        info="越低越轻量"
                     )
-            button_gen = gr.Button("🎯 Generate 3D Model", variant="primary", size="lg")
-        with gr.Column(scale=1):
-            output_model = gr.Model3D(
-                label="🎭 3D Model Preview",
-                height=512
-            )
-            gr.Markdown("""
-### 📌 预览操作：
-- 🖱️ 左键拖动：旋转
-- 🖱️ 右键拖动：平移
-- 🖱️ 滚轮：缩放
-- 📥 可下载生���的 GLB 文件
-            """)
     with gr.Row():
-        gr.Markdown("### 🖼️ Example Images (Click to Try):")
         gr.Examples(
             examples=[
                 ["examples/rabbit.png"],
                 ["examples/robot.png"],
                 ["examples/teapot.png"],
-                ["examples/barrel.png"],
-                ["examples/cactus.png"],
-                ["examples/cyan_car.png"],
-                ["examples/pickup.png"],
-                ["examples/swivelchair.png"],
-                ["examples/warhammer.png"],
             ],
             fn=process_image,
             inputs=[input_image],
@@ -387,18 +339,6 @@ with block:
             cache_examples=False
         )
-    gr.Markdown("""
----
-### ⚠️ Important Notes:
-- 这是 CPU 版，速度会比较慢
-- 若 Hugging Face CPU Space 配置较低，可能会出现超时或内存不足
-- 最适合主体清晰、背景简单的图片
-- 如果失败，请先降低参数再试
-### 🤝 Technical Support:
-Powered by NVIDIA PartPacker technology.
-    """)
     button_gen.click(
         fn=process_image,
         inputs=[input_image],

 import gradio as gr
 try:
     import spaces
 except ImportError:
     class spaces:
         class GPU:
             def __init__(self, duration=60):
 from flow.configs.schema import ModelConfig
 from flow.utils import get_random_color, recenter_foreground
 from vae.utils import postprocess_mesh
 from huggingface_hub import hf_hub_download
 # =========================
+# CPU 基础设置
 # =========================
 DEVICE = torch.device("cpu")
 DTYPE = torch.float32
 CPU_THREADS = int(os.environ.get("CPU_THREADS", "2"))
 torch.set_num_threads(CPU_THREADS)
+torch.set_num_interop_threads(max(1, min(2, CPU_THREADS)))
 TRIMESH_GLB_EXPORT = np.array(
     [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
     dtype=np.float32
 )
 MAX_SEED = np.iinfo(np.int32).max
 bg_remover = rembg.new_session()
+# =========================
+# 下载模型
+# =========================
+flow_ckpt_path = hf_hub_download(repo_id="nvidia/PartPacker", filename="flow.pt")
+vae_ckpt_path = hf_hub_download(repo_id="nvidia/PartPacker", filename="vae.pt")
 # =========================
 # 模型配置
 # =========================
 )
 # =========================
+# 工具函数：强制整个模块转 float32
+# =========================
+def force_module_fp32(module: torch.nn.Module):
+    """
+    递归把模块参数和 buffer 全部转成 float32。
+    这一步是解决 CPU 下 bfloat16/float32 混用问题的关键。
+    """
+    module.to(device=DEVICE)
+    module.float()
+    for child in module.children():
+        force_module_fp32(child)
+    for name, buf in module.named_buffers(recurse=False):
+        if torch.is_floating_point(buf):
+            setattr(module, name, buf.to(device=DEVICE, dtype=torch.float32))
+    return module
+# =========================
+# 初始化模型（CPU + float32）
 # =========================
+print("正在加载模型到 CPU ...")
+model = Model(model_config)
+model.eval()
+model.to(DEVICE)
+# 显式按 CPU 加载权重
 ckpt_dict = torch.load(flow_ckpt_path, map_location=DEVICE, weights_only=True)
 model.load_state_dict(ckpt_dict, strict=True)
+# 关键：再次强制整个模型为 float32
+force_module_fp32(model)
+model.eval()
 print("模型加载完成。")
+print("主模型 dtype:", next(model.parameters()).dtype)
 def get_random_seed(randomize_seed, seed):
     if randomize_seed:
         seed = np.random.randint(0, MAX_SEED)
+    return int(seed)
 def process_image(image_path):
     """
     处理输入图片：
+    1. 读图
+    2. 没有 alpha 就自动去背景
+    3. 主体居中
     4. 缩放到模型输入尺寸
     """
     if image_path is None:
         raise gr.Error("图片读取失败，请上传有效图片。")
     if image.ndim == 2:
         image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGBA)
     if image.shape[-1] == 4:
         image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA)
     else:
         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        image = rembg.remove(image, session=bg_remover)
     mask = image[..., -1] > 0
     image = recenter_foreground(image, mask, border_ratio=0.1)
 def process_3d(
     input_image,
+    num_steps=10,
+    cfg_scale=7.0,
+    grid_res=128,
     seed=42,
+    simplify_mesh=True,
+    target_num_faces=20000
 ):
     """
+    CPU 版 3D 生成
     """
     if input_image is None:
+        raise gr.Error("请先上传并处理图片。")
     try:
+        kiui.seed_everything(int(seed))
         os.makedirs("output", exist_ok=True)
         output_glb_path = f"output/partpacker_{datetime.now().strftime('%Y%m%d_%H%M%S')}.glb"
+        # RGBA -> float32
         image = input_image.astype(np.float32) / 255.0
+        image = image[..., :3] * image[..., 3:4] + (1.0 - image[..., 3:4])
         image_tensor = (
             torch.from_numpy(image)
             .permute(2, 0, 1)
             .contiguous()
             .unsqueeze(0)
+            .to(device=DEVICE, dtype=torch.float32)
         )
+        data = {
+            "cond_images": image_tensor.float()
+        }
+        # 再保险：推理前确保模型仍是 float32
+        force_module_fp32(model)
+        model.eval()
         with torch.inference_mode():
+            results = model(
+                data,
+                num_steps=int(num_steps),
+                cfg_scale=float(cfg_scale)
+            )
         latent = results["latent"]
+        # 关键：latent 强制 float32
+        if isinstance(latent, torch.Tensor):
+            latent = latent.to(device=DEVICE, dtype=torch.float32).contiguous()
+        else:
+            raise gr.Error("模型输出 latent 异常。")
+        # VAE 输入前再做 float32 保证
+        data_part0 = {
+            "latent": latent[:, : model.config.latent_size, :].float().contiguous()
+        }
+        data_part1 = {
+            "latent": latent[:, model.config.latent_size:, :].float().contiguous()
+        }
+        # 再保险：把 VAE 也强制成 float32
+        force_module_fp32(model.vae)
+        model.vae.eval()
         with torch.inference_mode():
             results_part0 = model.vae(data_part0, resolution=int(grid_res))
         parts = []
         vertices, faces = results_part0["meshes"][0]
         mesh_part0 = trimesh.Trimesh(vertices, faces, process=False)
         mesh_part0.vertices = mesh_part0.vertices @ TRIMESH_GLB_EXPORT.T
         mesh_part0 = postprocess_mesh(mesh_part0, int(target_num_faces))
         parts.extend(mesh_part0.split(only_watertight=False))
         vertices, faces = results_part1["meshes"][0]
         mesh_part1 = trimesh.Trimesh(vertices, faces, process=False)
         mesh_part1.vertices = mesh_part1.vertices @ TRIMESH_GLB_EXPORT.T
         parts.extend(mesh_part1.split(only_watertight=False))
         if len(parts) == 0:
+            raise gr.Error("没有生成有效网格，请换一张更清晰、背景更简单的图片。")
         for j, part in enumerate(parts):
             part.visual.vertex_colors = get_random_color(j, use_float=True)
         scene = trimesh.Scene(parts)
         scene.export(output_glb_path)
     except Exception as e:
         raise gr.Error(
+            "CPU 生成失败："
+            + str(e)
+            + "\n\n建议：\n"
+              "1. Inference Steps 先设为 10\n"
+              "2. Grid Resolution 先设为 128\n"
+              "3. 勾选 Simplify Mesh\n"
+              "4. Target Face Count 设为 20000\n"
+              "5. 使用主体清晰、背景简单的 PNG 图片"
         )
+_TITLE = "🎨 Image to 3D Model - CPU Version"
 _DESCRIPTION = """
+### CPU 版说明
+这是适配 Hugging Face CPU Space 的版本。
+### 建议参数
+- Inference Steps：10
+- CFG Scale：7.0
+- Grid Resolution：128
+- Simplify Mesh：开启
+- Target Face Count：20000
+### 注意
+该模型原本更适合 GPU，CPU 下会比较慢。
 """
+block = gr.Blocks(title=_TITLE).queue(max_size=2)
 with block:
+    gr.Markdown("# " + _TITLE)
     gr.Markdown(_DESCRIPTION)
     with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(label="上传图片", type="filepath")
+            seg_image = gr.Image(label="处理后图片", type="numpy", interactive=False, image_mode="RGBA")
+            with gr.Accordion("高级设置", open=False):
                 num_steps = gr.Slider(
                     label="Inference Steps",
                     minimum=1,
+                    maximum=30,
                     step=1,
+                    value=10
                 )
                 cfg_scale = gr.Slider(
                     label="CFG Scale",
                     minimum=2.0,
                     maximum=10.0,
                     step=0.1,
+                    value=7.0
                 )
                 input_grid_res = gr.Slider(
                     label="Grid Resolution",
+                    minimum=64,
+                    maximum=256,
                     step=1,
+                    value=128
                 )
                 with gr.Row():
+                    randomize_seed = gr.Checkbox(label="随机种子", value=True)
+                    seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
                 with gr.Row():
+                    simplify_mesh = gr.Checkbox(label="简化网格", value=True)
                     target_num_faces = gr.Slider(
+                        label="目标面数",
                         minimum=5000,
+                        maximum=50000,
                         step=1000,
+                        value=20000
                     )
+            button_gen = gr.Button("生成 3D 模型", variant="primary")
+        with gr.Column():
+            output_model = gr.Model3D(label="3D 预览", height=512)
     with gr.Row():
         gr.Examples(
             examples=[
                 ["examples/rabbit.png"],
                 ["examples/robot.png"],
                 ["examples/teapot.png"],
             ],
             fn=process_image,
             inputs=[input_image],
             cache_examples=False
         )
     button_gen.click(
         fn=process_image,
         inputs=[input_image],