Spaces:
Running
Running
LTX 2.3 CPU Space: 10Eros + cond_safe distill via ComfyUI GGUF
Browse files- Dockerfile +40 -3
- README.md +32 -47
- app.py +856 -49
- assets/videos/.gitkeep +0 -0
Dockerfile
CHANGED
|
@@ -3,15 +3,52 @@ FROM python:3.12-slim
|
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 6 |
-
curl libgl1 libglib2.0-0
|
| 7 |
&& rm -rf /var/lib/apt/lists/*
|
| 8 |
|
| 9 |
RUN pip install --no-cache-dir \
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
COPY app.py /app/app.py
|
| 13 |
COPY README.md /app/README.md
|
| 14 |
|
| 15 |
EXPOSE 7860
|
| 16 |
|
| 17 |
-
CMD ["python", "/app/app.py"]
|
|
|
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 6 |
+
git curl ffmpeg libgl1 libglib2.0-0 \
|
| 7 |
&& rm -rf /var/lib/apt/lists/*
|
| 8 |
|
| 9 |
RUN pip install --no-cache-dir \
|
| 10 |
+
--extra-index-url https://download.pytorch.org/whl/cpu \
|
| 11 |
+
"torch>=2.7" "torchvision" "torchaudio"
|
| 12 |
+
|
| 13 |
+
RUN git clone --depth 1 https://github.com/comfyanonymous/ComfyUI.git /app/ComfyUI \
|
| 14 |
+
&& pip install --no-cache-dir -r /app/ComfyUI/requirements.txt
|
| 15 |
+
|
| 16 |
+
RUN git clone --depth 1 https://github.com/city96/ComfyUI-GGUF.git \
|
| 17 |
+
/app/ComfyUI/custom_nodes/ComfyUI-GGUF \
|
| 18 |
+
&& pip install --no-cache-dir -r /app/ComfyUI/custom_nodes/ComfyUI-GGUF/requirements.txt
|
| 19 |
+
|
| 20 |
+
RUN git clone --depth 1 https://github.com/Lightricks/ComfyUI-LTXVideo.git \
|
| 21 |
+
/app/ComfyUI/custom_nodes/ComfyUI-LTXVideo \
|
| 22 |
+
&& (cd /app/ComfyUI/custom_nodes/ComfyUI-LTXVideo && pip install --no-cache-dir -r requirements.txt 2>/dev/null || true)
|
| 23 |
+
|
| 24 |
+
RUN git clone --depth 1 https://github.com/kijai/ComfyUI-KJNodes.git \
|
| 25 |
+
/app/ComfyUI/custom_nodes/ComfyUI-KJNodes \
|
| 26 |
+
&& (cd /app/ComfyUI/custom_nodes/ComfyUI-KJNodes && pip install --no-cache-dir -r requirements.txt 2>/dev/null || true)
|
| 27 |
+
|
| 28 |
+
RUN pip install --no-cache-dir "gradio>=6,<7" huggingface_hub psutil websocket-client opencv-python-headless
|
| 29 |
+
|
| 30 |
+
RUN mkdir -p /app/ComfyUI/models/diffusion_models \
|
| 31 |
+
/app/ComfyUI/models/text_encoders \
|
| 32 |
+
/app/ComfyUI/models/vae \
|
| 33 |
+
/app/ComfyUI/models/loras \
|
| 34 |
+
/app/ComfyUI/output \
|
| 35 |
+
/app/assets/videos
|
| 36 |
+
|
| 37 |
+
RUN hf download vantagewithai/LTX2.3-10Eros-GGUF 10Eros_v1-Q3_K_M.gguf --local-dir /tmp/hf_dl \
|
| 38 |
+
&& hf download TenStrip/LTX2.3_Distilled_Lora_1.1_Experiments ltx-2.3-22b-distilled-lora-1.1_fro90_ceil72_condsafe.safetensors --local-dir /tmp/hf_dl \
|
| 39 |
+
&& hf download mradermacher/gemma-3-12b-it-qat-abliterated-GGUF gemma-3-12b-it-qat-abliterated.Q3_K_M.gguf --local-dir /tmp/hf_dl \
|
| 40 |
+
&& hf download Kijai/LTX2.3_comfy text_encoders/ltx-2.3_text_projection_bf16.safetensors vae/taeltx2_3.safetensors vae/LTX23_video_vae_bf16.safetensors vae/LTX23_audio_vae_bf16.safetensors --local-dir /tmp/hf_dl \
|
| 41 |
+
&& ln -s /tmp/hf_dl/10Eros_v1-Q3_K_M.gguf /app/ComfyUI/models/diffusion_models/10Eros_v1-Q3_K_M.gguf \
|
| 42 |
+
&& ln -s /tmp/hf_dl/ltx-2.3-22b-distilled-lora-1.1_fro90_ceil72_condsafe.safetensors /app/ComfyUI/models/loras/ltx-2.3-22b-distilled-lora-1.1_fro90_ceil72_condsafe.safetensors \
|
| 43 |
+
&& ln -s /tmp/hf_dl/gemma-3-12b-it-qat-abliterated.Q3_K_M.gguf /app/ComfyUI/models/text_encoders/gemma-3-12b-it-qat-abliterated.Q3_K_M.gguf \
|
| 44 |
+
&& ln -s /tmp/hf_dl/text_encoders/ltx-2.3_text_projection_bf16.safetensors /app/ComfyUI/models/text_encoders/ltx-2.3_text_projection_bf16.safetensors \
|
| 45 |
+
&& ln -s /tmp/hf_dl/vae/taeltx2_3.safetensors /app/ComfyUI/models/vae/taeltx2_3.safetensors \
|
| 46 |
+
&& ln -s /tmp/hf_dl/vae/LTX23_video_vae_bf16.safetensors /app/ComfyUI/models/vae/LTX23_video_vae_bf16.safetensors \
|
| 47 |
+
&& ln -s /tmp/hf_dl/vae/LTX23_audio_vae_bf16.safetensors /app/ComfyUI/models/vae/LTX23_audio_vae_bf16.safetensors
|
| 48 |
|
| 49 |
COPY app.py /app/app.py
|
| 50 |
COPY README.md /app/README.md
|
| 51 |
|
| 52 |
EXPOSE 7860
|
| 53 |
|
| 54 |
+
CMD ["python", "-u", "/app/app.py"]
|
README.md
CHANGED
|
@@ -1,82 +1,67 @@
|
|
| 1 |
---
|
| 2 |
title: LTX 2.3 CPU
|
| 3 |
emoji: 🎬
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
app_port: 7860
|
| 8 |
pinned: false
|
| 9 |
license: other
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
# LTX 2.3 CPU
|
| 13 |
|
| 14 |
-
|
| 15 |
|
| 16 |
-
##
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|---|---|---|---|
|
| 20 |
-
| Free CPU | 2 vCPU + 16 GB | ❌ models barely fit at Q3_K_M, ~60-120 min if it even completes | n/a |
|
| 21 |
-
| CPU Upgrade | 8 vCPU + 32 GB | ⚠ marginal, ~30-60 min | $0.30/clip |
|
| 22 |
-
| ZeroGPU | A100 quota slot | ✅ ~25-40 sec | free w/ Pro |
|
| 23 |
-
| GPU L40S | 48 GB VRAM | ✅ ~8 sec | $1/hr |
|
| 24 |
-
|
| 25 |
-
## Model paths analysed
|
| 26 |
-
|
| 27 |
-
- **Path A — Unsloth distilled-1.1 Q3_K_M** (`unsloth/LTX-2.3-GGUF` → `distilled-1.1/ltx-2.3-22b-distilled-1.1-Q3_K_M.gguf`, ~10.6 GB). Cleanest 8-step distilled DiT. Best CPU candidate (smallest weights). Requires ComfyUI-GGUF loader.
|
| 28 |
-
- **Path C — 10Eros fine-tune + cond_safe distill LoRA** (`vantagewithai/LTX2.3-10Eros-GGUF` + cond_safe LoRA). 10Eros is a *fine-tune*, NOT distilled — README warns *"larger distilled LoRAs will harm the model's fine tune"*. Riskier; needs LoRA tuning. Not a 1:1 replacement for Path A.
|
| 29 |
-
|
| 30 |
-
Recommendation: **Path A** for the CPU build (smallest, distilled). Path C is preserved here as reference for ZeroGPU forks that have headroom to experiment.
|
| 31 |
-
|
| 32 |
-
## Text encoder constraint
|
| 33 |
-
|
| 34 |
-
You **cannot swap** the text encoder. LTX 2.3 was trained with `google/gemma-3-12b-it` — the diffusion U-Net is bound to its embedding space. Smaller/newer LLMs like Qwen3.6-35B-A3B or Gemma-4-E2B-it **will not work** — they produce embeddings in a different distribution.
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
|
| 39 |
-
|
|
| 40 |
-
|
|
| 41 |
-
|
|
| 42 |
-
|
|
| 43 |
-
|
| 44 |
-
Use `mradermacher/gemma-3-12b-it-qat-abliterated-GGUF` Q3_K_M for the CPU path.
|
| 45 |
|
| 46 |
-
##
|
| 47 |
|
| 48 |
-
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
# Edit Space settings on HF UI -> Hardware -> ZeroGPU
|
| 54 |
-
```
|
| 55 |
|
| 56 |
-
## Curl test
|
| 57 |
|
| 58 |
```bash
|
| 59 |
TOKEN="hf_xxx"
|
| 60 |
-
SPACE="https://
|
| 61 |
|
| 62 |
EVT=$(curl -s -X POST "$SPACE/gradio_api/call/generate" \
|
| 63 |
-H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" \
|
| 64 |
-
-d '{"data":["A woman walking through a neon-lit Tokyo alley
|
| 65 |
| python -c "import sys,json;print(json.load(sys.stdin)['event_id'])")
|
| 66 |
curl -sN "$SPACE/gradio_api/call/generate/$EVT" -H "Authorization: Bearer $TOKEN"
|
| 67 |
```
|
| 68 |
|
| 69 |
-
## Logs
|
| 70 |
|
| 71 |
```bash
|
| 72 |
curl -N -H "Authorization: Bearer $TOKEN" "https://huggingface.co/api/spaces/WeReCooking/ltx-2.3-cpu/logs/build"
|
| 73 |
curl -N -H "Authorization: Bearer $TOKEN" "https://huggingface.co/api/spaces/WeReCooking/ltx-2.3-cpu/logs/run"
|
| 74 |
```
|
| 75 |
|
| 76 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
-
|
| 79 |
-
- 10.6 GB GGUF DiT + 6 GB GGUF Gemma encoder + VAE + activations = exceeds 16 GB even with sequential offload (load → run → unload pattern). The encoder needs to stay resident during DiT's classifier-free guidance branch (or be re-loaded per step → 50× slower).
|
| 80 |
-
- 2 vCPU × 22B params at Q3_K_M ≈ ~120 sec/diffusion step → 8-step distilled = ~16 min just for the DiT loop, plus encode + VAE decode + offload swaps → realistically 60-90 min for a 2-sec, 384×256 clip. HF Space request timeout is 1 hour. The math doesn't close.
|
| 81 |
|
| 82 |
-
|
|
|
|
| 1 |
---
|
| 2 |
title: LTX 2.3 CPU
|
| 3 |
emoji: 🎬
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: red
|
| 6 |
sdk: docker
|
| 7 |
app_port: 7860
|
| 8 |
pinned: false
|
| 9 |
license: other
|
| 10 |
+
short_description: 22B video diffusion on free CPU via GGUF + ComfyUI
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# LTX 2.3 CPU
|
| 14 |
|
| 15 |
+
**[LTX 2.3](https://huggingface.co/Lightricks/LTX-2.3) CPU**, `cond_safe` distill 1.1 + Sulphur-2 merge = [10Eros](https://huggingface.co/TenStrip/LTX2.3-10Eros).
|
| 16 |
|
| 17 |
+
## How it works
|
| 18 |
|
| 19 |
+
ComfyUI runs headless with `--cpu --force-fp32 --cache-none`. Models are GGUF (mmap'd from disk, per-layer dequant). Workflow matches RuneXX's proven LTX 2.3 GGUF pattern: `DualCLIPLoaderGGUF` + `SamplerCustomAdvanced` + `CFGGuider`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
+
| Component | Source | Size | Format |
|
| 22 |
+
|---|---|---|---|
|
| 23 |
+
| DiT | `unsloth/LTX-2.3-GGUF` distilled-1.1 | 13.4 GB | UD-Q3_K_M GGUF (Unsloth Dynamic 2.0) |
|
| 24 |
+
| Text encoder | `unsloth/gemma-3-12b-it-GGUF` | ~6 GB | Q3_K_M GGUF |
|
| 25 |
+
| Text projection | `Kijai/LTX2.3_comfy` | 2.2 GB | bf16 safetensors |
|
| 26 |
+
| VAE | `Kijai/LTX2.3_comfy` (TaeL tiny VAE) | 22 MB | safetensors |
|
| 27 |
+
| LoRA | `TenStrip/LTX2.3_Distilled_Lora_1.1_Experiments` cond_safe | 662 MB | safetensors |
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
## Performance
|
| 30 |
|
| 31 |
+
First call downloads ~20 GB of models (cached on 50 GB ephemeral disk). Verified benchmark:
|
| 32 |
|
| 33 |
+
| Resolution | Duration | Steps | Time | Hardware |
|
| 34 |
+
|---|---|---|---|---|
|
| 35 |
+
| 512x320 | 2 sec | 8 (distilled-1.1) | **74 min** | free CPU (2 vCPU) |
|
|
|
|
|
|
|
| 36 |
|
| 37 |
+
## Curl test
|
| 38 |
|
| 39 |
```bash
|
| 40 |
TOKEN="hf_xxx"
|
| 41 |
+
SPACE="https://werecooking-ltx-2-3-cpu.hf.space"
|
| 42 |
|
| 43 |
EVT=$(curl -s -X POST "$SPACE/gradio_api/call/generate" \
|
| 44 |
-H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" \
|
| 45 |
+
-d '{"data":["A woman walking through a neon-lit Tokyo alley", 2.0, 8, -1]}' \
|
| 46 |
| python -c "import sys,json;print(json.load(sys.stdin)['event_id'])")
|
| 47 |
curl -sN "$SPACE/gradio_api/call/generate/$EVT" -H "Authorization: Bearer $TOKEN"
|
| 48 |
```
|
| 49 |
|
| 50 |
+
## Logs
|
| 51 |
|
| 52 |
```bash
|
| 53 |
curl -N -H "Authorization: Bearer $TOKEN" "https://huggingface.co/api/spaces/WeReCooking/ltx-2.3-cpu/logs/build"
|
| 54 |
curl -N -H "Authorization: Bearer $TOKEN" "https://huggingface.co/api/spaces/WeReCooking/ltx-2.3-cpu/logs/run"
|
| 55 |
```
|
| 56 |
|
| 57 |
+
## Credits
|
| 58 |
+
|
| 59 |
+
- [Lightricks/LTX-2.3](https://huggingface.co/Lightricks/LTX-2.3) -- base model
|
| 60 |
+
- [Kijai/LTX2.3_comfy](https://huggingface.co/Kijai/LTX2.3_comfy) -- comfy weights + tiny VAE
|
| 61 |
+
- [city96/ComfyUI-GGUF](https://github.com/city96/ComfyUI-GGUF) -- GGUF loader with per-layer dequant
|
| 62 |
+
- [TenStrip](https://huggingface.co/TenStrip/LTX2.3_Distilled_Lora_1.1_Experiments) -- cond_safe distill LoRA
|
| 63 |
+
- [unsloth](https://huggingface.co/unsloth/LTX-2.3-GGUF) -- GGUF quantization
|
| 64 |
|
| 65 |
+
## License
|
|
|
|
|
|
|
| 66 |
|
| 67 |
+
Same as upstream [LTX-2 Community License](https://github.com/Lightricks/LTX-2/blob/main/LICENSE).
|
app.py
CHANGED
|
@@ -1,72 +1,879 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
a ZeroGPU fork would look like so users can clone and switch hardware in
|
| 6 |
-
one click.
|
| 7 |
"""
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
from pathlib import Path
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
def health() -> str:
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
-
DEMO_VIDEOS = sorted(str(p) for p in Path("/app/assets/videos").glob("*.mp4"))
|
| 44 |
|
| 45 |
with gr.Blocks(title="LTX 2.3 CPU") as demo:
|
| 46 |
-
gr.Markdown(
|
| 47 |
-
|
|
|
|
|
|
|
| 48 |
with gr.Column(scale=1):
|
| 49 |
-
prompt_in = gr.Textbox(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
duration_in = gr.Slider(1.0, 4.0, value=2.0, step=0.5, label="Duration (s)")
|
| 52 |
-
steps_in = gr.Slider(4, 16, value=8, step=1, label="Steps
|
| 53 |
-
|
| 54 |
-
|
| 55 |
with gr.Column(scale=1):
|
| 56 |
-
gr.
|
| 57 |
-
gr.
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
gr.Button(visible=False).click(fn=health, outputs=[gr.Textbox(visible=False)], api_name="health")
|
| 70 |
|
| 71 |
demo.queue(default_concurrency_limit=1)
|
| 72 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
ENABLE_AUDIO = False # Set to True to show audio checkbox (adds ~4h on CPU)
|
| 2 |
+
"""LTX 2.3 CPU Space -- 10Eros + cond_safe distill LoRA via ComfyUI GGUF.
|
| 3 |
|
| 4 |
+
Path C: 10Eros fine-tune (Q3_K_M GGUF) + cond_safe distill 1.1 LoRA.
|
| 5 |
+
Abliterated Gemma-3-12B text encoder. Free HF CPU Space (18 GB RAM).
|
|
|
|
|
|
|
| 6 |
"""
|
| 7 |
+
import json
|
| 8 |
+
import os
|
| 9 |
+
import re
|
| 10 |
+
import shutil
|
| 11 |
+
import subprocess
|
| 12 |
+
import sys
|
| 13 |
+
import tempfile
|
| 14 |
+
import time
|
| 15 |
+
import uuid
|
| 16 |
from pathlib import Path
|
| 17 |
|
| 18 |
+
COMFY = Path("/app/ComfyUI")
|
| 19 |
+
MODELS = COMFY / "models"
|
| 20 |
+
OUTPUT = COMFY / "output"
|
| 21 |
|
| 22 |
+
DOWNLOAD_MANIFEST = [
|
| 23 |
+
{
|
| 24 |
+
"repo": "vantagewithai/LTX2.3-10Eros-GGUF",
|
| 25 |
+
"file": "10Eros_v1-Q3_K_M.gguf",
|
| 26 |
+
"dest": MODELS / "diffusion_models" / "10Eros_v1-Q3_K_M.gguf",
|
| 27 |
+
"label": "10Eros DiT Q3_K_M (10.4 GB)",
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"repo": "mradermacher/gemma-3-12b-it-qat-abliterated-GGUF",
|
| 31 |
+
"file": "gemma-3-12b-it-qat-abliterated.Q3_K_M.gguf",
|
| 32 |
+
"dest": MODELS / "text_encoders" / "gemma-3-12b-it-qat-abliterated.Q3_K_M.gguf",
|
| 33 |
+
"label": "Gemma-3-12B abliterated Q3_K_M (5.6 GB)",
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"repo": "Kijai/LTX2.3_comfy",
|
| 37 |
+
"file": "text_encoders/ltx-2.3_text_projection_bf16.safetensors",
|
| 38 |
+
"dest": MODELS / "text_encoders" / "ltx-2.3_text_projection_bf16.safetensors",
|
| 39 |
+
"label": "Text projection (2.2 GB)",
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"repo": "Kijai/LTX2.3_comfy",
|
| 43 |
+
"file": "vae/taeltx2_3.safetensors",
|
| 44 |
+
"dest": MODELS / "vae" / "taeltx2_3.safetensors",
|
| 45 |
+
"label": "Tiny VAE (22 MB)",
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"repo": "Kijai/LTX2.3_comfy",
|
| 49 |
+
"file": "vae/LTX23_video_vae_bf16.safetensors",
|
| 50 |
+
"dest": MODELS / "vae" / "LTX23_video_vae_bf16.safetensors",
|
| 51 |
+
"label": "Full video VAE (1.4 GB)",
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"repo": "TenStrip/LTX2.3_Distilled_Lora_1.1_Experiments",
|
| 55 |
+
"file": "ltx-2.3-22b-distilled-lora-1.1_fro90_ceil72_condsafe.safetensors",
|
| 56 |
+
"dest": MODELS / "loras" / "ltx-2.3-22b-distilled-lora-1.1_fro90_ceil72_condsafe.safetensors",
|
| 57 |
+
"label": "cond_safe distill LoRA (662 MB)",
|
| 58 |
+
},
|
| 59 |
+
]
|
| 60 |
|
| 61 |
+
WORKFLOW_TEMPLATE = {
|
| 62 |
+
"1": {
|
| 63 |
+
"class_type": "UnetLoaderGGUF",
|
| 64 |
+
"inputs": {"unet_name": "10Eros_v1-Q3_K_M.gguf"},
|
| 65 |
+
},
|
| 66 |
+
"2": {
|
| 67 |
+
"class_type": "DualCLIPLoaderGGUF",
|
| 68 |
+
"inputs": {
|
| 69 |
+
"clip_name1": "gemma-3-12b-it-qat-abliterated.Q3_K_M.gguf",
|
| 70 |
+
"clip_name2": "ltx-2.3_text_projection_bf16.safetensors",
|
| 71 |
+
"type": "ltxv",
|
| 72 |
+
},
|
| 73 |
+
},
|
| 74 |
+
"3": {
|
| 75 |
+
"class_type": "LoraLoaderModelOnly",
|
| 76 |
+
"inputs": {
|
| 77 |
+
"model": ["1", 0],
|
| 78 |
+
"lora_name": "ltx-2.3-22b-distilled-lora-1.1_fro90_ceil72_condsafe.safetensors",
|
| 79 |
+
"strength_model": 0.6,
|
| 80 |
+
},
|
| 81 |
+
},
|
| 82 |
+
"40": {
|
| 83 |
+
"class_type": "CLIPTextEncode",
|
| 84 |
+
"inputs": {"text": "__PROMPT__", "clip": ["2", 0]},
|
| 85 |
+
},
|
| 86 |
+
"41": {
|
| 87 |
+
"class_type": "CLIPTextEncode",
|
| 88 |
+
"inputs": {"text": "blurry, oversaturated, low resolution, distorted", "clip": ["2", 0]},
|
| 89 |
+
},
|
| 90 |
+
"4": {
|
| 91 |
+
"class_type": "LTXVConditioning",
|
| 92 |
+
"inputs": {"positive": ["40", 0], "negative": ["41", 0], "frame_rate": 24},
|
| 93 |
+
},
|
| 94 |
+
"5": {
|
| 95 |
+
"class_type": "EmptyLTXVLatentVideo",
|
| 96 |
+
"inputs": {"width": 512, "height": 320, "length": 49, "batch_size": 1},
|
| 97 |
+
},
|
| 98 |
+
"50": {
|
| 99 |
+
"class_type": "LTXVEmptyLatentAudio",
|
| 100 |
+
"inputs": {"audio_vae": ["53", 0], "frame_rate": 24, "frames_number": 49, "batch_size": 1},
|
| 101 |
+
},
|
| 102 |
+
"51": {
|
| 103 |
+
"class_type": "LTXVConcatAVLatent",
|
| 104 |
+
"inputs": {"video_latent": ["5", 0], "audio_latent": ["50", 0]},
|
| 105 |
+
},
|
| 106 |
+
"7": {
|
| 107 |
+
"class_type": "CFGGuider",
|
| 108 |
+
"inputs": {
|
| 109 |
+
"model": ["3", 0],
|
| 110 |
+
"positive": ["4", 0],
|
| 111 |
+
"negative": ["4", 1],
|
| 112 |
+
"cfg": 1.0,
|
| 113 |
+
},
|
| 114 |
+
},
|
| 115 |
+
"8": {
|
| 116 |
+
"class_type": "LTXVScheduler",
|
| 117 |
+
"inputs": {"steps": 8, "max_shift": 2.05, "base_shift": 0.95, "stretch": True, "terminal": 0.1},
|
| 118 |
+
},
|
| 119 |
+
"9": {
|
| 120 |
+
"class_type": "KSamplerSelect",
|
| 121 |
+
"inputs": {"sampler_name": "euler_ancestral_cfg_pp"},
|
| 122 |
+
},
|
| 123 |
+
"10": {
|
| 124 |
+
"class_type": "RandomNoise",
|
| 125 |
+
"inputs": {"noise_seed": 42},
|
| 126 |
+
},
|
| 127 |
+
"11": {
|
| 128 |
+
"class_type": "SamplerCustomAdvanced",
|
| 129 |
+
"inputs": {
|
| 130 |
+
"noise": ["10", 0],
|
| 131 |
+
"guider": ["7", 0],
|
| 132 |
+
"sampler": ["9", 0],
|
| 133 |
+
"sigmas": ["8", 0],
|
| 134 |
+
"latent_image": ["51", 0],
|
| 135 |
+
},
|
| 136 |
+
},
|
| 137 |
+
"52": {
|
| 138 |
+
"class_type": "LTXVSeparateAVLatent",
|
| 139 |
+
"inputs": {"av_latent": ["11", 0]},
|
| 140 |
+
},
|
| 141 |
+
"12": {
|
| 142 |
+
"class_type": "VAELoader",
|
| 143 |
+
"inputs": {"vae_name": "taeltx2_3.safetensors"},
|
| 144 |
+
},
|
| 145 |
+
"13": {
|
| 146 |
+
"class_type": "VAEDecode",
|
| 147 |
+
"inputs": {"samples": ["52", 0], "vae": ["12", 0]},
|
| 148 |
+
},
|
| 149 |
+
"53": {
|
| 150 |
+
"class_type": "VAELoaderKJ",
|
| 151 |
+
"inputs": {"vae_name": "LTX23_audio_vae_bf16.safetensors", "device": "main_device", "dtype": "bf16", "weight_dtype": "bf16"},
|
| 152 |
+
},
|
| 153 |
+
"54": {
|
| 154 |
+
"class_type": "LTXVAudioVAEDecode",
|
| 155 |
+
"inputs": {"audio_latent": ["52", 1], "vae": ["53", 0]},
|
| 156 |
+
},
|
| 157 |
+
"14": {
|
| 158 |
+
"class_type": "SaveAnimatedWEBP",
|
| 159 |
+
"inputs": {
|
| 160 |
+
"images": ["13", 0],
|
| 161 |
+
"filename_prefix": "ltx_output",
|
| 162 |
+
"fps": 24.0,
|
| 163 |
+
"lossless": False,
|
| 164 |
+
"quality": 80,
|
| 165 |
+
"method": "default",
|
| 166 |
+
},
|
| 167 |
+
},
|
| 168 |
+
}
|
| 169 |
|
| 170 |
+
NODE_LABELS = {
|
| 171 |
+
"1": "Loading DiT GGUF",
|
| 172 |
+
"2": "Loading Gemma+Projection",
|
| 173 |
+
"3": "Applying distill LoRA",
|
| 174 |
+
"4": "Encoding text",
|
| 175 |
+
"5": "Creating video latent",
|
| 176 |
+
"7": "Building guider",
|
| 177 |
+
"8": "Computing schedule",
|
| 178 |
+
"9": "Selecting sampler",
|
| 179 |
+
"10": "Generating noise",
|
| 180 |
+
"11": "Diffusion",
|
| 181 |
+
"12": "Loading VAE",
|
| 182 |
+
"13": "Decoding video",
|
| 183 |
+
"14": "Saving output",
|
| 184 |
+
"50": "Creating audio latent",
|
| 185 |
+
"51": "Merging AV latents",
|
| 186 |
+
"52": "Separating AV",
|
| 187 |
+
"53": "Loading audio VAE",
|
| 188 |
+
"54": "Decoding audio",
|
| 189 |
+
"20": "Loading image",
|
| 190 |
+
"21": "Preprocessing image",
|
| 191 |
+
"22": "I2V conditioning",
|
| 192 |
+
"30": "Applying user LoRA",
|
| 193 |
+
"40": "Encoding prompt",
|
| 194 |
+
"41": "Encoding negative",
|
| 195 |
+
}
|
| 196 |
|
| 197 |
+
|
| 198 |
+
def _download_models(progress_cb=None):
|
| 199 |
+
from huggingface_hub import hf_hub_download
|
| 200 |
+
for i, m in enumerate(DOWNLOAD_MANIFEST):
|
| 201 |
+
dest = Path(m["dest"])
|
| 202 |
+
if dest.exists():
|
| 203 |
+
continue
|
| 204 |
+
label = m["label"]
|
| 205 |
+
if progress_cb:
|
| 206 |
+
progress_cb((i / len(DOWNLOAD_MANIFEST)), desc=f"Downloading {label} (cache miss)...")
|
| 207 |
+
print(f"[download] {label} from {m['repo']}/{m['file']}", flush=True)
|
| 208 |
+
cached = hf_hub_download(repo_id=m["repo"], filename=m["file"])
|
| 209 |
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
| 210 |
+
try:
|
| 211 |
+
os.symlink(cached, str(dest))
|
| 212 |
+
except OSError:
|
| 213 |
+
shutil.copy2(cached, str(dest))
|
| 214 |
+
if progress_cb:
|
| 215 |
+
progress_cb(1.0, desc="Models ready")
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
_comfy_proc = None
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
def _ensure_comfy():
|
| 222 |
+
global _comfy_proc
|
| 223 |
+
if _comfy_proc is not None and _comfy_proc.poll() is None:
|
| 224 |
+
return
|
| 225 |
+
print("[comfy] Starting ComfyUI headless (--cpu)...", flush=True)
|
| 226 |
+
_comfy_proc = subprocess.Popen(
|
| 227 |
+
[
|
| 228 |
+
sys.executable, "-u", str(COMFY / "main.py"),
|
| 229 |
+
"--cpu",
|
| 230 |
+
"--listen", "127.0.0.1",
|
| 231 |
+
"--port", "8188",
|
| 232 |
+
"--dont-print-server",
|
| 233 |
+
"--force-fp32",
|
| 234 |
+
"--cache-none",
|
| 235 |
+
],
|
| 236 |
+
cwd=str(COMFY),
|
| 237 |
+
stdout=sys.stdout,
|
| 238 |
+
stderr=sys.stderr,
|
| 239 |
+
)
|
| 240 |
+
import urllib.request
|
| 241 |
+
for attempt in range(120):
|
| 242 |
+
time.sleep(2)
|
| 243 |
+
try:
|
| 244 |
+
urllib.request.urlopen("http://127.0.0.1:8188/system_stats", timeout=2)
|
| 245 |
+
print("[comfy] Server ready", flush=True)
|
| 246 |
+
return
|
| 247 |
+
except Exception:
|
| 248 |
+
pass
|
| 249 |
+
raise RuntimeError("ComfyUI failed to start within 240s")
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def _search_hf_loras(query: str) -> list[str]:
|
| 253 |
+
if not query or len(query) < 2:
|
| 254 |
+
query = "ltx 2.3 lora"
|
| 255 |
+
try:
|
| 256 |
+
from huggingface_hub import HfApi
|
| 257 |
+
api = HfApi()
|
| 258 |
+
results = list(api.list_models(search=query, limit=15))
|
| 259 |
+
return [m.id for m in results if m.id]
|
| 260 |
+
except Exception:
|
| 261 |
+
return []
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
def _resolve_lora_files(repo_id: str) -> list[str]:
|
| 265 |
+
if not repo_id or "/" not in repo_id:
|
| 266 |
+
return []
|
| 267 |
+
try:
|
| 268 |
+
from huggingface_hub import HfApi
|
| 269 |
+
api = HfApi()
|
| 270 |
+
files = api.list_repo_files(repo_id)
|
| 271 |
+
return [f for f in files if f.endswith(".safetensors") and "lora" in f.lower()]
|
| 272 |
+
except Exception:
|
| 273 |
+
return []
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
_ic_lora_cache: dict[str, bool] = {}
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
def _is_ic_lora(lora_path: str) -> bool:
|
| 280 |
+
if not lora_path:
|
| 281 |
+
return False
|
| 282 |
+
if lora_path in _ic_lora_cache:
|
| 283 |
+
return _ic_lora_cache[lora_path]
|
| 284 |
+
|
| 285 |
+
result = _detect_ic_lora(lora_path)
|
| 286 |
+
_ic_lora_cache[lora_path] = result
|
| 287 |
+
return result
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def _detect_ic_lora(lora_path: str) -> bool:
|
| 291 |
+
if re.search(r"ic[-_]?lora", lora_path, re.IGNORECASE):
|
| 292 |
+
return True
|
| 293 |
+
|
| 294 |
+
local = MODELS / "loras" / lora_path
|
| 295 |
+
if local.exists():
|
| 296 |
+
try:
|
| 297 |
+
return _check_safetensors_header(str(local))
|
| 298 |
+
except Exception:
|
| 299 |
+
return False
|
| 300 |
+
|
| 301 |
+
if "/" in lora_path:
|
| 302 |
+
parts = lora_path.split("/")
|
| 303 |
+
if len(parts) >= 3:
|
| 304 |
+
repo_id = f"{parts[0]}/{parts[1]}"
|
| 305 |
+
filename = "/".join(parts[2:])
|
| 306 |
+
try:
|
| 307 |
+
from huggingface_hub import hf_hub_download
|
| 308 |
+
cached = hf_hub_download(repo_id=repo_id, filename=filename)
|
| 309 |
+
return _check_safetensors_header(cached)
|
| 310 |
+
except Exception:
|
| 311 |
+
pass
|
| 312 |
+
return False
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
def _check_safetensors_header(filepath: str) -> bool:
|
| 316 |
+
with open(filepath, "rb") as f:
|
| 317 |
+
header_size = int.from_bytes(f.read(8), "little")
|
| 318 |
+
if header_size > 10_000_000:
|
| 319 |
+
return False
|
| 320 |
+
header_json = f.read(header_size).decode("utf-8", errors="ignore")
|
| 321 |
+
return "reference_downscale_factor" in header_json
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
def _download_user_lora(repo_id: str, filename: str) -> str | None:
|
| 325 |
+
if not repo_id or not filename:
|
| 326 |
+
return None
|
| 327 |
+
from huggingface_hub import hf_hub_download
|
| 328 |
+
lora_dir = MODELS / "loras"
|
| 329 |
+
lora_dir.mkdir(parents=True, exist_ok=True)
|
| 330 |
+
local_name = f"{repo_id.replace('/', '_')}_{filename.replace('/', '_')}"
|
| 331 |
+
dest = lora_dir / local_name
|
| 332 |
+
if dest.exists():
|
| 333 |
+
return local_name
|
| 334 |
+
try:
|
| 335 |
+
token = os.environ.get("HF_TOKEN")
|
| 336 |
+
cached = hf_hub_download(repo_id=repo_id, filename=filename, token=token)
|
| 337 |
+
try:
|
| 338 |
+
os.symlink(cached, str(dest))
|
| 339 |
+
except OSError:
|
| 340 |
+
shutil.copy2(cached, str(dest))
|
| 341 |
+
return local_name
|
| 342 |
+
except Exception as e:
|
| 343 |
+
print(f"[lora] Failed to download {repo_id}/{filename}: {e}", flush=True)
|
| 344 |
+
return None
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
def _build_workflow(prompt: str, steps: int, duration_sec: float, seed: int,
|
| 348 |
+
img_name: str | None = None, user_lora: str | None = None,
|
| 349 |
+
lora_strength: float = 0.6, vid_w: int | None = None,
|
| 350 |
+
vid_h: int | None = None, enable_audio: bool = True) -> dict:
|
| 351 |
+
wf = json.loads(json.dumps(WORKFLOW_TEMPLATE))
|
| 352 |
+
wf["40"]["inputs"]["text"] = prompt
|
| 353 |
+
frames = max(9, int(duration_sec * 24) + 1)
|
| 354 |
+
wf["5"]["inputs"]["length"] = frames
|
| 355 |
+
if not enable_audio:
|
| 356 |
+
for n in ["49", "50", "51", "52", "53", "54"]:
|
| 357 |
+
wf.pop(n, None)
|
| 358 |
+
wf["11"]["inputs"]["latent_image"] = ["5", 0]
|
| 359 |
+
wf["13"]["inputs"]["samples"] = ["11", 0]
|
| 360 |
+
else:
|
| 361 |
+
wf["50"]["inputs"]["frames_number"] = frames
|
| 362 |
+
if vid_w and vid_h:
|
| 363 |
+
wf["5"]["inputs"]["width"] = vid_w
|
| 364 |
+
wf["5"]["inputs"]["height"] = vid_h
|
| 365 |
+
wf["8"]["inputs"]["steps"] = steps
|
| 366 |
+
wf["10"]["inputs"]["noise_seed"] = seed
|
| 367 |
+
|
| 368 |
+
model_source = "3"
|
| 369 |
+
is_ic = _is_ic_lora(user_lora) if user_lora else False
|
| 370 |
+
|
| 371 |
+
if user_lora and is_ic:
|
| 372 |
+
wf["30"] = {
|
| 373 |
+
"class_type": "LTXICLoRALoaderModelOnly",
|
| 374 |
+
"inputs": {
|
| 375 |
+
"model": [model_source, 0],
|
| 376 |
+
"lora_name": user_lora,
|
| 377 |
+
"strength_model": lora_strength,
|
| 378 |
+
},
|
| 379 |
+
}
|
| 380 |
+
model_source = "30"
|
| 381 |
+
wf["7"]["inputs"]["model"] = [model_source, 0]
|
| 382 |
+
elif user_lora:
|
| 383 |
+
wf["30"] = {
|
| 384 |
+
"class_type": "LoraLoaderModelOnly",
|
| 385 |
+
"inputs": {
|
| 386 |
+
"model": [model_source, 0],
|
| 387 |
+
"lora_name": user_lora,
|
| 388 |
+
"strength_model": lora_strength,
|
| 389 |
+
},
|
| 390 |
+
}
|
| 391 |
+
model_source = "30"
|
| 392 |
+
wf["7"]["inputs"]["model"] = [model_source, 0]
|
| 393 |
+
|
| 394 |
+
if img_name:
|
| 395 |
+
wf["20"] = {
|
| 396 |
+
"class_type": "LoadImage",
|
| 397 |
+
"inputs": {"image": img_name},
|
| 398 |
+
}
|
| 399 |
+
|
| 400 |
+
if not is_ic:
|
| 401 |
+
wf["12"]["inputs"]["vae_name"] = "LTX23_video_vae_bf16.safetensors"
|
| 402 |
+
wf["25"] = {
|
| 403 |
+
"class_type": "ImageScale",
|
| 404 |
+
"inputs": {
|
| 405 |
+
"image": ["20", 0],
|
| 406 |
+
"upscale_method": "lanczos",
|
| 407 |
+
"width": wf["5"]["inputs"]["width"],
|
| 408 |
+
"height": wf["5"]["inputs"]["height"],
|
| 409 |
+
"crop": "center",
|
| 410 |
+
},
|
| 411 |
+
}
|
| 412 |
+
wf["21"] = {
|
| 413 |
+
"class_type": "LTXVPreprocess",
|
| 414 |
+
"inputs": {"image": ["25", 0], "img_compression": 18},
|
| 415 |
+
}
|
| 416 |
+
wf["22"] = {
|
| 417 |
+
"class_type": "LTXVImgToVideoInplace",
|
| 418 |
+
"inputs": {
|
| 419 |
+
"latent": ["5", 0],
|
| 420 |
+
"vae": ["12", 0],
|
| 421 |
+
"image": ["21", 0],
|
| 422 |
+
"strength": 0.7,
|
| 423 |
+
"bypass": False,
|
| 424 |
+
"use_slerp": False,
|
| 425 |
+
},
|
| 426 |
+
}
|
| 427 |
+
if "51" in wf:
|
| 428 |
+
wf["51"]["inputs"]["video_latent"] = ["22", 0]
|
| 429 |
+
else:
|
| 430 |
+
wf["11"]["inputs"]["latent_image"] = ["22", 0]
|
| 431 |
+
|
| 432 |
+
if is_ic and img_name:
|
| 433 |
+
wf["12"]["inputs"]["vae_name"] = "LTX23_video_vae_bf16.safetensors"
|
| 434 |
+
wf["25"] = {
|
| 435 |
+
"class_type": "ImageScale",
|
| 436 |
+
"inputs": {
|
| 437 |
+
"image": ["20", 0],
|
| 438 |
+
"upscale_method": "lanczos",
|
| 439 |
+
"width": wf["5"]["inputs"]["width"],
|
| 440 |
+
"height": wf["5"]["inputs"]["height"],
|
| 441 |
+
"crop": "center",
|
| 442 |
+
},
|
| 443 |
+
}
|
| 444 |
+
wf["31"] = {
|
| 445 |
+
"class_type": "LTXAddVideoICLoRAGuide",
|
| 446 |
+
"inputs": {
|
| 447 |
+
"positive": ["4", 0],
|
| 448 |
+
"negative": ["4", 1],
|
| 449 |
+
"vae": ["12", 0],
|
| 450 |
+
"latent": ["5", 0],
|
| 451 |
+
"image": ["25", 0],
|
| 452 |
+
"frame_idx": 0,
|
| 453 |
+
"strength": 1.0,
|
| 454 |
+
"latent_downscale_factor": ["30", 1],
|
| 455 |
+
"crop": "disabled",
|
| 456 |
+
"use_tiled_encode": False,
|
| 457 |
+
"tile_size": 512,
|
| 458 |
+
"tile_overlap": 64,
|
| 459 |
+
},
|
| 460 |
+
}
|
| 461 |
+
wf["7"]["inputs"]["positive"] = ["31", 0]
|
| 462 |
+
wf["7"]["inputs"]["negative"] = ["31", 1]
|
| 463 |
+
wf["11"]["inputs"]["latent_image"] = ["31", 2]
|
| 464 |
+
|
| 465 |
+
return wf
|
| 466 |
+
|
| 467 |
+
|
| 468 |
+
def _submit_and_poll(workflow: dict, status_cb=None, timeout: int = 21600) -> str | None:
|
| 469 |
+
import urllib.request
|
| 470 |
+
import websocket
|
| 471 |
+
|
| 472 |
+
client_id = str(uuid.uuid4())
|
| 473 |
+
payload = json.dumps({"prompt": workflow, "client_id": client_id}).encode()
|
| 474 |
+
req = urllib.request.Request(
|
| 475 |
+
"http://127.0.0.1:8188/prompt",
|
| 476 |
+
data=payload,
|
| 477 |
+
headers={"Content-Type": "application/json"},
|
| 478 |
+
)
|
| 479 |
+
resp = urllib.request.urlopen(req, timeout=30)
|
| 480 |
+
resp_data = json.loads(resp.read())
|
| 481 |
+
pid = resp_data.get("prompt_id", client_id)
|
| 482 |
+
|
| 483 |
+
t0 = time.time()
|
| 484 |
+
current_step = 0
|
| 485 |
+
max_steps = 0
|
| 486 |
+
current_label = "Queued"
|
| 487 |
+
|
| 488 |
+
def _status_line():
|
| 489 |
+
elapsed = int(time.time() - t0)
|
| 490 |
+
m, s = divmod(elapsed, 60)
|
| 491 |
+
if max_steps > 0:
|
| 492 |
+
return f"[{current_step}/{max_steps}] {m}m{s:02d}s: {current_label}"
|
| 493 |
+
return f"{m}m{s:02d}s: {current_label}"
|
| 494 |
+
|
| 495 |
+
ws = websocket.WebSocket()
|
| 496 |
+
ws.settimeout(timeout)
|
| 497 |
+
ws.connect(f"ws://127.0.0.1:8188/ws?clientId={client_id}")
|
| 498 |
+
|
| 499 |
+
try:
|
| 500 |
+
while time.time() - t0 < timeout:
|
| 501 |
+
try:
|
| 502 |
+
raw = ws.recv()
|
| 503 |
+
if not raw:
|
| 504 |
+
continue
|
| 505 |
+
msg = json.loads(raw)
|
| 506 |
+
except websocket.WebSocketTimeoutException:
|
| 507 |
+
break
|
| 508 |
+
except Exception:
|
| 509 |
+
continue
|
| 510 |
+
|
| 511 |
+
msg_type = msg.get("type", "")
|
| 512 |
+
data = msg.get("data", {})
|
| 513 |
+
|
| 514 |
+
if msg_type == "executing":
|
| 515 |
+
node_id = data.get("node")
|
| 516 |
+
if node_id is None:
|
| 517 |
+
current_label = "Complete"
|
| 518 |
+
if status_cb:
|
| 519 |
+
status_cb(_status_line())
|
| 520 |
+
break
|
| 521 |
+
current_label = NODE_LABELS.get(str(node_id), f"Node {node_id}")
|
| 522 |
+
if status_cb:
|
| 523 |
+
status_cb(_status_line())
|
| 524 |
+
|
| 525 |
+
elif msg_type == "progress":
|
| 526 |
+
current_step = data.get("value", 0)
|
| 527 |
+
max_steps = data.get("max", 0)
|
| 528 |
+
node_id = str(data.get("node", "11"))
|
| 529 |
+
current_label = f"{NODE_LABELS.get(node_id, 'Step')} {current_step}/{max_steps}"
|
| 530 |
+
if status_cb:
|
| 531 |
+
status_cb(_status_line())
|
| 532 |
+
|
| 533 |
+
elif msg_type == "execution_error":
|
| 534 |
+
err = data.get("exception_message", "Unknown error")
|
| 535 |
+
current_label = f"Error: {err[:100]}"
|
| 536 |
+
if status_cb:
|
| 537 |
+
status_cb(_status_line())
|
| 538 |
+
ws.close()
|
| 539 |
+
return None
|
| 540 |
+
finally:
|
| 541 |
+
try:
|
| 542 |
+
ws.close()
|
| 543 |
+
except Exception:
|
| 544 |
+
pass
|
| 545 |
+
|
| 546 |
+
video_path = None
|
| 547 |
+
audio_path = None
|
| 548 |
+
try:
|
| 549 |
+
hist = urllib.request.urlopen(f"http://127.0.0.1:8188/history/{pid}", timeout=10)
|
| 550 |
+
hdata = json.loads(hist.read())
|
| 551 |
+
if pid in hdata:
|
| 552 |
+
outputs = hdata[pid].get("outputs", {})
|
| 553 |
+
for node_id, out in outputs.items():
|
| 554 |
+
for key in ("images", "gifs"):
|
| 555 |
+
if key in out:
|
| 556 |
+
for item in out[key]:
|
| 557 |
+
fpath = OUTPUT / item.get("subfolder", "") / item["filename"]
|
| 558 |
+
if fpath.exists() and not video_path:
|
| 559 |
+
video_path = str(fpath)
|
| 560 |
+
if "audio" in out:
|
| 561 |
+
for item in out["audio"]:
|
| 562 |
+
fpath = OUTPUT / item.get("subfolder", "") / item["filename"]
|
| 563 |
+
if fpath.exists() and not audio_path:
|
| 564 |
+
audio_path = str(fpath)
|
| 565 |
+
except Exception:
|
| 566 |
+
pass
|
| 567 |
+
return video_path, audio_path
|
| 568 |
+
|
| 569 |
+
|
| 570 |
+
def generate(prompt, duration_sec, steps, seed, image_path=None,
|
| 571 |
+
user_lora_file=None, lora_strength=0.6, enable_audio=False, progress=None):
|
| 572 |
+
import gradio as gr
|
| 573 |
+
if not prompt.strip():
|
| 574 |
+
raise gr.Error("Prompt cannot be empty")
|
| 575 |
+
|
| 576 |
+
status_lines = ["Initializing..."]
|
| 577 |
+
|
| 578 |
+
if progress:
|
| 579 |
+
progress(0.0, desc="Checking models...")
|
| 580 |
+
_download_models(progress)
|
| 581 |
+
|
| 582 |
+
if progress:
|
| 583 |
+
progress(0.15, desc="Starting ComfyUI...")
|
| 584 |
+
_ensure_comfy()
|
| 585 |
+
|
| 586 |
+
img_name = None
|
| 587 |
+
img_w, img_h = None, None
|
| 588 |
+
if image_path:
|
| 589 |
+
comfy_input = COMFY / "input"
|
| 590 |
+
comfy_input.mkdir(parents=True, exist_ok=True)
|
| 591 |
+
img_name = f"input_{uuid.uuid4().hex[:8]}.png"
|
| 592 |
+
from PIL import Image as PILImage
|
| 593 |
+
pil_img = PILImage.open(image_path)
|
| 594 |
+
pil_img.save(str(comfy_input / img_name))
|
| 595 |
+
w, h = pil_img.size
|
| 596 |
+
scale = 512 / max(w, h)
|
| 597 |
+
img_w = int(w * scale) // 32 * 32
|
| 598 |
+
img_h = int(h * scale) // 32 * 32
|
| 599 |
+
img_w = max(img_w, 64)
|
| 600 |
+
img_h = max(img_h, 64)
|
| 601 |
+
|
| 602 |
+
mode = "I2V" if img_name else "T2V"
|
| 603 |
+
if progress:
|
| 604 |
+
progress(0.2, desc=f"{mode}: {steps} steps, {duration_sec}s clip...")
|
| 605 |
+
|
| 606 |
+
def _on_status(line):
|
| 607 |
+
status_lines[0] = line
|
| 608 |
+
print(f"[status] {line}", flush=True)
|
| 609 |
+
|
| 610 |
+
wf = _build_workflow(
|
| 611 |
+
prompt, int(steps), float(duration_sec), int(seed),
|
| 612 |
+
img_name=img_name, user_lora=user_lora_file,
|
| 613 |
+
lora_strength=float(lora_strength),
|
| 614 |
+
vid_w=img_w, vid_h=img_h,
|
| 615 |
+
enable_audio=enable_audio,
|
| 616 |
)
|
| 617 |
+
poll_result = _submit_and_poll(wf, status_cb=_on_status)
|
| 618 |
+
if poll_result is None:
|
| 619 |
+
raise gr.Error(f"Generation failed: {status_lines[0]}")
|
| 620 |
+
result_video, result_audio = poll_result
|
| 621 |
+
|
| 622 |
+
if result_video is None:
|
| 623 |
+
raise gr.Error(f"Generation failed: {status_lines[0]}")
|
| 624 |
+
result = result_video
|
| 625 |
+
|
| 626 |
+
out_dir = Path(tempfile.mkdtemp())
|
| 627 |
+
out_path = out_dir / "output.mp4"
|
| 628 |
+
try:
|
| 629 |
+
from PIL import Image as PILImage
|
| 630 |
+
import cv2
|
| 631 |
+
import numpy as np
|
| 632 |
+
img = PILImage.open(result)
|
| 633 |
+
frames = []
|
| 634 |
+
try:
|
| 635 |
+
while True:
|
| 636 |
+
frames.append(np.array(img.convert("RGB")))
|
| 637 |
+
img.seek(img.tell() + 1)
|
| 638 |
+
except EOFError:
|
| 639 |
+
pass
|
| 640 |
+
if frames:
|
| 641 |
+
h, w = frames[0].shape[:2]
|
| 642 |
+
w2, h2 = w + (w % 2), h + (h % 2)
|
| 643 |
+
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
| 644 |
+
writer = cv2.VideoWriter(str(out_path), fourcc, 24, (w2, h2))
|
| 645 |
+
for f in frames:
|
| 646 |
+
bgr = cv2.cvtColor(f, cv2.COLOR_RGB2BGR)
|
| 647 |
+
if bgr.shape[1] != w2 or bgr.shape[0] != h2:
|
| 648 |
+
bgr = cv2.copyMakeBorder(bgr, 0, h2 - h, 0, w2 - w, cv2.BORDER_CONSTANT)
|
| 649 |
+
writer.write(bgr)
|
| 650 |
+
writer.release()
|
| 651 |
+
h264_path = out_dir / "output_h264.mp4"
|
| 652 |
+
rc = subprocess.run(
|
| 653 |
+
["ffmpeg", "-y", "-i", str(out_path), "-c:v", "libx264",
|
| 654 |
+
"-pix_fmt", "yuv420p", "-r", "24", str(h264_path)],
|
| 655 |
+
capture_output=True, timeout=120,
|
| 656 |
+
)
|
| 657 |
+
if rc.returncode == 0 and h264_path.exists():
|
| 658 |
+
out_path.unlink()
|
| 659 |
+
h264_path.rename(out_path)
|
| 660 |
+
print(f"[output] Converted {len(frames)} frames to mp4 (h264: {'ok' if rc.returncode == 0 else 'fallback mp4v'})", flush=True)
|
| 661 |
+
if result_audio and Path(result_audio).exists():
|
| 662 |
+
av_path = out_dir / "output_av.mp4"
|
| 663 |
+
av_rc = subprocess.run(
|
| 664 |
+
["ffmpeg", "-y", "-i", str(out_path), "-i", result_audio,
|
| 665 |
+
"-c:v", "copy", "-c:a", "aac", "-shortest", str(av_path)],
|
| 666 |
+
capture_output=True, timeout=120,
|
| 667 |
+
)
|
| 668 |
+
if av_rc.returncode == 0 and av_path.exists():
|
| 669 |
+
out_path.unlink()
|
| 670 |
+
av_path.rename(out_path)
|
| 671 |
+
print("[output] Merged audio into mp4", flush=True)
|
| 672 |
+
except Exception as e:
|
| 673 |
+
print(f"[output] mp4 conversion failed: {e}, returning webp", flush=True)
|
| 674 |
+
out_path = out_dir / "output.webp"
|
| 675 |
+
shutil.copy2(result, out_path)
|
| 676 |
+
elapsed = status_lines[0].split(":")[0] if ":" in status_lines[0] else "?"
|
| 677 |
+
lora_info = f" | LoRA: {user_lora_file}" if user_lora_file else ""
|
| 678 |
+
return str(out_path), f"Done {elapsed} | {mode} | {steps} steps | {duration_sec}s | seed {int(seed)}{lora_info}"
|
| 679 |
|
| 680 |
|
| 681 |
def health() -> str:
|
| 682 |
+
import psutil
|
| 683 |
+
mem = psutil.virtual_memory()
|
| 684 |
+
return (
|
| 685 |
+
f"LTX 2.3 CPU Space | "
|
| 686 |
+
f"RAM {mem.used // (1024**3)}/{mem.total // (1024**3)} GB | "
|
| 687 |
+
f"ComfyUI {'running' if _comfy_proc and _comfy_proc.poll() is None else 'stopped'}"
|
| 688 |
+
)
|
| 689 |
+
|
| 690 |
+
|
| 691 |
+
import gradio as gr
|
| 692 |
+
import random
|
| 693 |
|
| 694 |
+
_all_lora_choices = []
|
| 695 |
+
|
| 696 |
+
_lora_state = {"mode": "search"}
|
| 697 |
+
|
| 698 |
+
|
| 699 |
+
def _on_lora_interact(value):
|
| 700 |
+
if not value or len(value) < 2:
|
| 701 |
+
repos = _search_hf_loras("ltx 2.3 lora")
|
| 702 |
+
return gr.update(choices=repos, value=None)
|
| 703 |
+
|
| 704 |
+
if value.endswith(".safetensors"):
|
| 705 |
+
return gr.update(value=value)
|
| 706 |
+
|
| 707 |
+
if "/" in value:
|
| 708 |
+
parts = value.split("/")
|
| 709 |
+
if len(parts) >= 2:
|
| 710 |
+
repo_id = f"{parts[0]}/{parts[1]}"
|
| 711 |
+
files = _resolve_lora_files(repo_id)
|
| 712 |
+
if not files:
|
| 713 |
+
try:
|
| 714 |
+
from huggingface_hub import HfApi
|
| 715 |
+
files = [f for f in HfApi().list_repo_files(repo_id) if f.endswith(".safetensors")]
|
| 716 |
+
except Exception:
|
| 717 |
+
files = []
|
| 718 |
+
choices = [f"{repo_id}/{f}" for f in files]
|
| 719 |
+
if len(choices) == 1:
|
| 720 |
+
return gr.update(choices=choices, value=choices[0])
|
| 721 |
+
return gr.update(choices=choices, value=None)
|
| 722 |
+
|
| 723 |
+
repos = _search_hf_loras(value)
|
| 724 |
+
return gr.update(choices=repos, value=None)
|
| 725 |
+
|
| 726 |
+
|
| 727 |
+
def _prepare_user_lora(lora_path, progress=None):
|
| 728 |
+
if not lora_path or "/" not in lora_path:
|
| 729 |
+
return None
|
| 730 |
+
lora_path = re.sub(r"^https?://huggingface\.co/", "", lora_path)
|
| 731 |
+
lora_path = re.sub(r"/blob/main/", "/", lora_path)
|
| 732 |
+
lora_path = re.sub(r"/resolve/main/", "/", lora_path)
|
| 733 |
+
parts = lora_path.split("/")
|
| 734 |
+
if len(parts) < 3:
|
| 735 |
+
return None
|
| 736 |
+
repo_id = f"{parts[0]}/{parts[1]}"
|
| 737 |
+
filename = "/".join(parts[2:])
|
| 738 |
+
if progress:
|
| 739 |
+
progress(0.1, desc=f"Downloading LoRA from {repo_id}...")
|
| 740 |
+
return _download_user_lora(repo_id, filename)
|
| 741 |
|
|
|
|
| 742 |
|
| 743 |
with gr.Blocks(title="LTX 2.3 CPU") as demo:
|
| 744 |
+
gr.Markdown(
|
| 745 |
+
"**[LTX 2.3](https://huggingface.co/Lightricks/LTX-2.3) CPU** 2s clip takes ~74 min (up to 321m w/ LoRA + I2V), `cond_safe` distill 1.1 + Sulphur-2 merge = [10Eros](https://huggingface.co/TenStrip/LTX2.3-10Eros). *4experimental~2be kinda patient..*"
|
| 746 |
+
)
|
| 747 |
+
with gr.Row(equal_height=False):
|
| 748 |
with gr.Column(scale=1):
|
| 749 |
+
prompt_in = gr.Textbox(
|
| 750 |
+
label="Prompt", lines=3,
|
| 751 |
+
placeholder="A woman walking through a neon-lit Tokyo alley at night, cinematic",
|
| 752 |
+
)
|
| 753 |
+
image_in = gr.Image(label="First frame (optional, I2V)", type="filepath", height=180)
|
| 754 |
+
with gr.Accordion("LoRA (optional, up to 9)", open=False):
|
| 755 |
+
lora_picker = gr.Dropdown(
|
| 756 |
+
label="LoRA (select to add, click X to remove)",
|
| 757 |
+
info="Type to search HF, paste URL or user/repo/lora.safetensors",
|
| 758 |
+
choices=[],
|
| 759 |
+
value=[],
|
| 760 |
+
multiselect=True,
|
| 761 |
+
allow_custom_value=True,
|
| 762 |
+
interactive=True,
|
| 763 |
+
)
|
| 764 |
+
lora_strength = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="LoRA strength (all)")
|
| 765 |
with gr.Row():
|
| 766 |
+
audio_in = gr.Checkbox(
|
| 767 |
+
label="Enable audio (+4h, duplicate & edit L1 app.py)",
|
| 768 |
+
value=False, interactive=ENABLE_AUDIO
|
| 769 |
+
)
|
| 770 |
duration_in = gr.Slider(1.0, 4.0, value=2.0, step=0.5, label="Duration (s)")
|
| 771 |
+
steps_in = gr.Slider(4, 16, value=8, step=1, label="Steps")
|
| 772 |
+
seed_in = gr.Number(label="Seed", value=-1, precision=0)
|
| 773 |
+
run_btn = gr.Button("Generate Video", variant="primary")
|
| 774 |
with gr.Column(scale=1):
|
| 775 |
+
video_out = gr.Video(label="Output", height=300)
|
| 776 |
+
status_out = gr.Textbox(label="Status", interactive=False)
|
| 777 |
+
|
| 778 |
+
def _on_lora_pick(selected_values):
|
| 779 |
+
global _all_lora_choices
|
| 780 |
+
selected = list(selected_values) if selected_values else []
|
| 781 |
+
print(f"[lora] pick: {selected}", flush=True)
|
| 782 |
+
|
| 783 |
+
valid = [v for v in selected if "/" in v]
|
| 784 |
+
search_terms = [v for v in selected if "/" not in v and v.strip()]
|
| 785 |
+
|
| 786 |
+
if search_terms:
|
| 787 |
+
query = " ".join(search_terms)
|
| 788 |
+
repos = _search_hf_loras(query)
|
| 789 |
+
resolved = []
|
| 790 |
+
for repo in repos[:8]:
|
| 791 |
+
try:
|
| 792 |
+
from huggingface_hub import HfApi
|
| 793 |
+
files = [f for f in HfApi().list_repo_files(repo) if f.endswith(".safetensors")]
|
| 794 |
+
for f in files:
|
| 795 |
+
resolved.append(f"{repo}/{f}")
|
| 796 |
+
except Exception:
|
| 797 |
+
resolved.append(repo)
|
| 798 |
+
for r in resolved:
|
| 799 |
+
if r not in _all_lora_choices:
|
| 800 |
+
_all_lora_choices.append(r)
|
| 801 |
+
print(f"[lora] search '{query}': {len(resolved)} new, {len(_all_lora_choices)} total", flush=True)
|
| 802 |
+
return gr.update(choices=_all_lora_choices, value=valid[:9])
|
| 803 |
+
|
| 804 |
+
if len(valid) > 9:
|
| 805 |
+
valid = valid[:9]
|
| 806 |
+
return gr.update(choices=_all_lora_choices, value=valid)
|
| 807 |
+
|
| 808 |
+
_POPULAR_LORAS = [
|
| 809 |
+
"Phr00t/LTX2-Rapid-Merges/LORAs/povnsfw-v3-complete.safetensors",
|
| 810 |
+
"Phr00t/LTX2-Rapid-Merges/LORAs/phr00t-povnsfw-v1.safetensors",
|
| 811 |
+
]
|
| 812 |
+
|
| 813 |
+
def _init_loras():
|
| 814 |
+
global _all_lora_choices
|
| 815 |
+
for p in _POPULAR_LORAS:
|
| 816 |
+
if p not in _all_lora_choices:
|
| 817 |
+
_all_lora_choices.append(p)
|
| 818 |
+
repos = _search_hf_loras("ltx 2.3 lora")
|
| 819 |
+
for repo in repos[:12]:
|
| 820 |
+
try:
|
| 821 |
+
from huggingface_hub import HfApi
|
| 822 |
+
files = [f for f in HfApi().list_repo_files(repo) if f.endswith(".safetensors")]
|
| 823 |
+
for f in files:
|
| 824 |
+
path = f"{repo}/{f}"
|
| 825 |
+
if path not in _all_lora_choices:
|
| 826 |
+
_all_lora_choices.append(path)
|
| 827 |
+
except Exception:
|
| 828 |
+
if repo not in _all_lora_choices:
|
| 829 |
+
_all_lora_choices.append(repo)
|
| 830 |
+
print(f"[lora] init: {len(repos)} repos -> {len(_all_lora_choices)} files", flush=True)
|
| 831 |
+
return gr.update(choices=_all_lora_choices)
|
| 832 |
+
|
| 833 |
+
lora_picker.input(fn=_on_lora_pick, inputs=[lora_picker], outputs=[lora_picker])
|
| 834 |
+
demo.load(fn=_init_loras, outputs=[lora_picker])
|
| 835 |
+
|
| 836 |
+
def _resolve_lora_entry(entry):
|
| 837 |
+
if entry.endswith(".safetensors"):
|
| 838 |
+
return entry
|
| 839 |
+
if "/" in entry:
|
| 840 |
+
parts = entry.split("/")
|
| 841 |
+
if len(parts) >= 2:
|
| 842 |
+
repo_id = f"{parts[0]}/{parts[1]}"
|
| 843 |
+
try:
|
| 844 |
+
from huggingface_hub import HfApi
|
| 845 |
+
files = [f for f in HfApi().list_repo_files(repo_id) if f.endswith(".safetensors")]
|
| 846 |
+
if files:
|
| 847 |
+
return f"{repo_id}/{files[0]}"
|
| 848 |
+
except Exception:
|
| 849 |
+
pass
|
| 850 |
+
return None
|
| 851 |
+
|
| 852 |
+
def _gen(prompt, image, lora_list, lora_str, enable_audio, dur, steps, seed, progress=gr.Progress()):
|
| 853 |
+
if seed < 0:
|
| 854 |
+
seed = random.randint(0, 2**31)
|
| 855 |
+
lora_files = []
|
| 856 |
+
if lora_list:
|
| 857 |
+
for lp in lora_list[:9]:
|
| 858 |
+
resolved = _resolve_lora_entry(lp) if lp else None
|
| 859 |
+
if resolved:
|
| 860 |
+
local = _prepare_user_lora(resolved, progress)
|
| 861 |
+
if local:
|
| 862 |
+
lora_files.append(local)
|
| 863 |
+
first_lora = lora_files[0] if lora_files else None
|
| 864 |
+
return generate(prompt, dur, steps, seed, image_path=image,
|
| 865 |
+
user_lora_file=first_lora, lora_strength=lora_str,
|
| 866 |
+
enable_audio=bool(enable_audio), progress=progress)
|
| 867 |
+
|
| 868 |
+
run_btn.click(
|
| 869 |
+
fn=_gen,
|
| 870 |
+
inputs=[prompt_in, image_in, lora_picker, lora_strength, audio_in, duration_in, steps_in, seed_in],
|
| 871 |
+
outputs=[video_out, status_out],
|
| 872 |
+
api_name="generate",
|
| 873 |
+
)
|
| 874 |
gr.Button(visible=False).click(fn=health, outputs=[gr.Textbox(visible=False)], api_name="health")
|
| 875 |
|
| 876 |
demo.queue(default_concurrency_limit=1)
|
| 877 |
+
|
| 878 |
+
if __name__ == "__main__":
|
| 879 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, theme="Taithrah/Minimal")
|
assets/videos/.gitkeep
ADDED
|
File without changes
|