Update app.py
Browse files
app.py
CHANGED
|
@@ -267,18 +267,78 @@ checkpoint_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-22b-
|
|
| 267 |
spatial_upsampler_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-spatial-upscaler-x2-1.0.safetensors")
|
| 268 |
gemma_root = snapshot_download(repo_id=GEMMA_REPO)
|
| 269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
print(f"Checkpoint: {checkpoint_path}")
|
| 271 |
print(f"Spatial upsampler: {spatial_upsampler_path}")
|
| 272 |
print(f"Gemma root: {gemma_root}")
|
| 273 |
|
| 274 |
# Initialize pipeline WITH text encoder and optional audio support
|
|
|
|
| 275 |
pipeline = LTX23DistilledA2VPipeline(
|
| 276 |
distilled_checkpoint_path=checkpoint_path,
|
| 277 |
spatial_upsampler_path=spatial_upsampler_path,
|
| 278 |
gemma_root=gemma_root,
|
| 279 |
-
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
# Preload all models for ZeroGPU tensor packing.
|
| 284 |
print("Preloading all models (including Gemma and audio components)...")
|
|
@@ -347,7 +407,7 @@ def on_highres_toggle(first_image, last_image, high_res):
|
|
| 347 |
return gr.update(value=w), gr.update(value=h)
|
| 348 |
|
| 349 |
|
| 350 |
-
@spaces.GPU(duration=
|
| 351 |
@torch.inference_mode()
|
| 352 |
def generate_video(
|
| 353 |
first_image,
|
|
@@ -360,6 +420,9 @@ def generate_video(
|
|
| 360 |
randomize_seed: bool = True,
|
| 361 |
height: int = 1024,
|
| 362 |
width: int = 1536,
|
|
|
|
|
|
|
|
|
|
| 363 |
progress=gr.Progress(track_tqdm=True),
|
| 364 |
):
|
| 365 |
try:
|
|
@@ -399,6 +462,8 @@ def generate_video(
|
|
| 399 |
|
| 400 |
log_memory("before pipeline call")
|
| 401 |
|
|
|
|
|
|
|
| 402 |
video, audio = pipeline(
|
| 403 |
prompt=prompt,
|
| 404 |
seed=current_seed,
|
|
@@ -464,9 +529,23 @@ with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
|
|
| 464 |
with gr.Row():
|
| 465 |
enhance_prompt = gr.Checkbox(label="Enhance Prompt", value=False)
|
| 466 |
high_res = gr.Checkbox(label="High Resolution", value=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
|
| 468 |
with gr.Column():
|
| 469 |
-
output_video = gr.Video(label="Generated Video", autoplay=
|
| 470 |
|
| 471 |
gr.Examples(
|
| 472 |
examples=[
|
|
@@ -486,11 +565,15 @@ with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
|
|
| 486 |
True,
|
| 487 |
1024,
|
| 488 |
1024,
|
|
|
|
|
|
|
|
|
|
| 489 |
],
|
| 490 |
],
|
| 491 |
inputs=[
|
| 492 |
first_image, last_image, input_audio, prompt, duration,
|
| 493 |
enhance_prompt, seed, randomize_seed, height, width,
|
|
|
|
| 494 |
],
|
| 495 |
)
|
| 496 |
|
|
@@ -517,6 +600,7 @@ with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
|
|
| 517 |
inputs=[
|
| 518 |
first_image, last_image, input_audio, prompt, duration, enhance_prompt,
|
| 519 |
seed, randomize_seed, height, width,
|
|
|
|
| 520 |
],
|
| 521 |
outputs=[output_video, seed],
|
| 522 |
)
|
|
|
|
| 267 |
spatial_upsampler_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-spatial-upscaler-x2-1.0.safetensors")
|
| 268 |
gemma_root = snapshot_download(repo_id=GEMMA_REPO)
|
| 269 |
|
| 270 |
+
# ---- Insert block (LoRA downloads) between lines 268 and 269 ----
|
| 271 |
+
# LoRA repo + download the requested LoRA adapters
|
| 272 |
+
LORA_REPO = "dagloop5/LoRA"
|
| 273 |
+
|
| 274 |
+
print("=" * 80)
|
| 275 |
+
print("Downloading LoRA adapters from dagloop5/LoRA...")
|
| 276 |
+
print("=" * 80)
|
| 277 |
+
pose_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="pose_enhancer.safetensors")
|
| 278 |
+
general_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="general_enhancer.safetensors")
|
| 279 |
+
motion_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="motion_helper.safetensors")
|
| 280 |
+
|
| 281 |
+
print(f"Pose LoRA: {pose_lora_path}")
|
| 282 |
+
print(f"General LoRA: {general_lora_path}")
|
| 283 |
+
print(f"Motion LoRA: {motion_lora_path}")
|
| 284 |
+
# ----------------------------------------------------------------
|
| 285 |
+
|
| 286 |
print(f"Checkpoint: {checkpoint_path}")
|
| 287 |
print(f"Spatial upsampler: {spatial_upsampler_path}")
|
| 288 |
print(f"Gemma root: {gemma_root}")
|
| 289 |
|
| 290 |
# Initialize pipeline WITH text encoder and optional audio support
|
| 291 |
+
# ---- Replace block (pipeline init) lines 275-281 ----
|
| 292 |
pipeline = LTX23DistilledA2VPipeline(
|
| 293 |
distilled_checkpoint_path=checkpoint_path,
|
| 294 |
spatial_upsampler_path=spatial_upsampler_path,
|
| 295 |
gemma_root=gemma_root,
|
| 296 |
+
# initial LoRAs (strengths set to zero by default; will be adjusted at runtime)
|
| 297 |
+
loras=[
|
| 298 |
+
(pose_lora_path, 0.0),
|
| 299 |
+
(general_lora_path, 0.0),
|
| 300 |
+
(motion_lora_path, 0.0),
|
| 301 |
+
],
|
| 302 |
+
quantization=QuantizationPolicy.fp8_cast(), # keep FP8 quantization unchanged
|
| 303 |
)
|
| 304 |
+
# ----------------------------------------------------------------
|
| 305 |
+
|
| 306 |
+
# ---- Insert block: helper to apply LoRA strengths at runtime (between lines 281 and 283) ----
|
| 307 |
+
def apply_loras_to_pipeline(pose_strength: float, general_strength: float, motion_strength: float):
|
| 308 |
+
"""
|
| 309 |
+
Best-effort attempt to set LoRA strengths on the existing pipeline without creating a new pipeline object.
|
| 310 |
+
Tries common APIs that model_ledger / pipeline implementations may provide; falls back to setting
|
| 311 |
+
pipeline.loras (for lazy fusion) and logs a note.
|
| 312 |
+
"""
|
| 313 |
+
# Build the spec list (keep ordering consistent with the pipeline loras list created above)
|
| 314 |
+
lora_specs = [
|
| 315 |
+
(pose_lora_path, float(pose_strength)),
|
| 316 |
+
(general_lora_path, float(general_strength)),
|
| 317 |
+
(motion_lora_path, float(motion_strength)),
|
| 318 |
+
]
|
| 319 |
+
try:
|
| 320 |
+
# Preferred: model_ledger exposes an apply_loras API that fuses adapters in-place.
|
| 321 |
+
if hasattr(pipeline, "model_ledger") and hasattr(pipeline.model_ledger, "apply_loras"):
|
| 322 |
+
print("[LoRA] Applying LoRAs via pipeline.model_ledger.apply_loras(...)")
|
| 323 |
+
pipeline.model_ledger.apply_loras(lora_specs)
|
| 324 |
+
return
|
| 325 |
+
|
| 326 |
+
# Alternative: pipeline may have helper 'set_loras' or similar
|
| 327 |
+
if hasattr(pipeline, "set_loras"):
|
| 328 |
+
print("[LoRA] Applying LoRAs via pipeline.set_loras(...)")
|
| 329 |
+
pipeline.set_loras(lora_specs)
|
| 330 |
+
return
|
| 331 |
+
|
| 332 |
+
# Fallback: set an attribute that some pipeline implementations inspect lazily.
|
| 333 |
+
print("[LoRA] Setting pipeline.loras attribute (fallback). If pipeline supports lazy fusion, new strengths will be used.")
|
| 334 |
+
pipeline.loras = lora_specs
|
| 335 |
+
return
|
| 336 |
+
|
| 337 |
+
except Exception as e:
|
| 338 |
+
# Non-fatal: advise user and keep running with whatever pipeline state exists.
|
| 339 |
+
print(f"[LoRA] Warning: failed to apply LoRAs at runtime: {type(e).__name__}: {e}")
|
| 340 |
+
print("[LoRA] If you see this, the pipeline may require a restart or an explicit fusion API to apply LoRAs.")
|
| 341 |
+
# ----------------------------------------------------------------
|
| 342 |
|
| 343 |
# Preload all models for ZeroGPU tensor packing.
|
| 344 |
print("Preloading all models (including Gemma and audio components)...")
|
|
|
|
| 407 |
return gr.update(value=w), gr.update(value=h)
|
| 408 |
|
| 409 |
|
| 410 |
+
@spaces.GPU(duration=80)
|
| 411 |
@torch.inference_mode()
|
| 412 |
def generate_video(
|
| 413 |
first_image,
|
|
|
|
| 420 |
randomize_seed: bool = True,
|
| 421 |
height: int = 1024,
|
| 422 |
width: int = 1536,
|
| 423 |
+
pose_strength: float = 0.0,
|
| 424 |
+
general_strength: float = 0.0,
|
| 425 |
+
motion_strength: float = 0.0,
|
| 426 |
progress=gr.Progress(track_tqdm=True),
|
| 427 |
):
|
| 428 |
try:
|
|
|
|
| 462 |
|
| 463 |
log_memory("before pipeline call")
|
| 464 |
|
| 465 |
+
apply_loras_to_pipeline(pose_strength, general_strength, motion_strength)
|
| 466 |
+
|
| 467 |
video, audio = pipeline(
|
| 468 |
prompt=prompt,
|
| 469 |
seed=current_seed,
|
|
|
|
| 529 |
with gr.Row():
|
| 530 |
enhance_prompt = gr.Checkbox(label="Enhance Prompt", value=False)
|
| 531 |
high_res = gr.Checkbox(label="High Resolution", value=True)
|
| 532 |
+
with gr.Column():
|
| 533 |
+
gr.Markdown("### LoRA adapter strengths (set to 0 to disable)")
|
| 534 |
+
pose_strength = gr.Slider(
|
| 535 |
+
label="Pose Enhancer strength",
|
| 536 |
+
minimum=0.0, maximum=2.0, value=0.0, step=0.01
|
| 537 |
+
)
|
| 538 |
+
general_strength = gr.Slider(
|
| 539 |
+
label="General Enhancer strength",
|
| 540 |
+
minimum=0.0, maximum=2.0, value=0.0, step=0.01
|
| 541 |
+
)
|
| 542 |
+
motion_strength = gr.Slider(
|
| 543 |
+
label="Motion Helper strength",
|
| 544 |
+
minimum=0.0, maximum=2.0, value=0.0, step=0.01
|
| 545 |
+
)
|
| 546 |
|
| 547 |
with gr.Column():
|
| 548 |
+
output_video = gr.Video(label="Generated Video", autoplay=False)
|
| 549 |
|
| 550 |
gr.Examples(
|
| 551 |
examples=[
|
|
|
|
| 565 |
True,
|
| 566 |
1024,
|
| 567 |
1024,
|
| 568 |
+
0.0, # pose_strength (example)
|
| 569 |
+
0.0, # general_strength (example)
|
| 570 |
+
0.0, # motion_strength (example)
|
| 571 |
],
|
| 572 |
],
|
| 573 |
inputs=[
|
| 574 |
first_image, last_image, input_audio, prompt, duration,
|
| 575 |
enhance_prompt, seed, randomize_seed, height, width,
|
| 576 |
+
pose_strength, general_strength, motion_strength,
|
| 577 |
],
|
| 578 |
)
|
| 579 |
|
|
|
|
| 600 |
inputs=[
|
| 601 |
first_image, last_image, input_audio, prompt, duration, enhance_prompt,
|
| 602 |
seed, randomize_seed, height, width,
|
| 603 |
+
pose_strength, general_strength, motion_strength,
|
| 604 |
],
|
| 605 |
outputs=[output_video, seed],
|
| 606 |
)
|