Testing3

Runtime error

App Files Files Community

dagloop5 commited on Mar 23

Commit

428e251

verified ·

1 Parent(s): d27af4b

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -138

app.py CHANGED Viewed

@@ -272,6 +272,10 @@ LORA_CACHE_DIR = Path("lora_cache")
 LORA_CACHE_DIR.mkdir(exist_ok=True)
 current_lora_key: str | None = None
 checkpoint_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-22b-distilled.safetensors")
 spatial_upsampler_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-spatial-upscaler-x2-1.0.safetensors")
 gemma_root = snapshot_download(repo_id=GEMMA_REPO)
@@ -307,91 +311,76 @@ pipeline = LTX23DistilledA2VPipeline(
 )
 # ----------------------------------------------------------------
-def apply_loras_to_pipeline(pose_strength: float, general_strength: float, motion_strength: float):
-    """
-    Apply LoRAs using cached fused state_dicts when available, otherwise
-    build fused transformer on CPU, save its state_dict to cache, and then
-    copy parameters in-place into the GPU-resident transformer.
-    Caching key:
-      sha256( f"{pose_path}:{round(pose,2)}|{general_path}:{round(general,2)}|{motion_path}:{round(motion,2)}" )
-    Rounding to 2 decimals reduces unique keys and increases cache hits.
-    """
-    ledger = pipeline.model_ledger
-    global current_lora_key, LORA_CACHE_DIR
-    # Round strengths to 2 decimals for cache stability
     rp = round(float(pose_strength), 2)
     rg = round(float(general_strength), 2)
     rm = round(float(motion_strength), 2)
     key_str = f"{pose_lora_path}:{rp}|{general_lora_path}:{rg}|{motion_lora_path}:{rm}"
     key = hashlib.sha256(key_str.encode("utf-8")).hexdigest()
-    cache_path = LORA_CACHE_DIR / f"{key}.pt"
-    # If same key already applied, skip
-    if current_lora_key == key:
-        print("[LoRA] Key unchanged; skipping rebuild/copy.")
-        return
-    # If cache exists, load the fused state_dict directly
-    new_state = None
-    if cache_path.exists():
-        try:
-            print("[LoRA] Cache hit: loading fused state_dict from cache.")
-            new_state = torch.load(cache_path, map_location="cpu")
-            print("[LoRA] Loaded cached fused state_dict.")
-        except Exception as e:
-            print(f"[LoRA] Failed to load cache {cache_path}: {type(e).__name__}: {e}")
-            new_state = None
-    # If no cache, build tmp_ledger and create fused transformer on CPU, save state_dict
-    if new_state is None:
-        # Only build if there is at least one non-zero strength
-        entries = [
-            (pose_lora_path, rp),
-            (general_lora_path, rg),
-            (motion_lora_path, rm),
-        ]
-        loras_for_builder = [
-            LoraPathStrengthAndSDOps(path, strength, LTXV_LORA_COMFY_RENAMING_MAP)
-            for path, strength in entries
-            if path is not None and float(strength) != 0.0
-        ]
-        if len(loras_for_builder) == 0:
-            print("[LoRA] No nonzero LoRA strengths — skipping rebuild.")
-            return
         try:
-            tmp_ledger = ledger.with_loras(tuple(loras_for_builder))
-            print(f"[LoRA] Built temporary ledger with {len(loras_for_builder)} LoRA(s).")
         except Exception as e:
-            print(f"[LoRA] Failed to create temporary ledger: {type(e).__name__}: {e}")
-            return
-        # Build fused transformer on CPU only. Ensure tmp_ledger._target_device is callable.
         orig_tmp_target = getattr(tmp_ledger, "_target_device", None)
         orig_tmp_device = getattr(tmp_ledger, "device", None)
         try:
-            tmp_ledger._target_device = (lambda: torch.device("cpu"))
             tmp_ledger.device = torch.device("cpu")
-            print("[LoRA] Building fused transformer on CPU (no GPU allocation)...")
-            new_transformer_cpu = tmp_ledger.transformer()  # model on CPU
-            print("[LoRA] Fused transformer built on CPU.")
-        except Exception as e:
-            import traceback
-            print(f"[LoRA] Error while building fused transformer on CPU: {type(e).__name__}: {e}")
-            print(traceback.format_exc())
-            # cleanup
-            try:
-                del tmp_ledger
-            except Exception:
-                pass
-            gc.collect()
-            return
         finally:
-            # restore attributes
             if orig_tmp_target is not None:
                 tmp_ledger._target_device = orig_tmp_target
             else:
@@ -407,85 +396,65 @@ def apply_loras_to_pipeline(pose_strength: float, general_strength: float, motio
                 except Exception:
                     pass
-        # Extract state_dict on CPU and save to cache
-        try:
-            new_state = new_transformer_cpu.state_dict()
-            # Save CPU state_dict for future reuse
-            try:
-                torch.save(new_state, cache_path)
-                print(f"[LoRA] Saved fused state_dict to cache: {cache_path}")
-            except Exception as e:
-                print(f"[LoRA] Warning: failed to save cache {cache_path}: {type(e).__name__}: {e}")
-        except Exception as e:
-            print(f"[LoRA] Failed to get state_dict from CPU model: {type(e).__name__}: {e}")
-            new_state = None
-        # Free CPU model and temporary ledger to release memory
         try:
             del new_transformer_cpu
             del tmp_ledger
         except Exception:
             pass
         gc.collect()
-        torch.cuda.empty_cache()
-        if new_state is None:
-            print("[LoRA] Building fused state failed; aborting.")
-            return
-    # At this point new_state is a CPU state_dict (either from cache or just-built)
-    try:
-        # Get existing GPU-resident transformer (cached reference _transformer)
-        global _transformer
-        try:
-            existing_transformer = _transformer
-        except NameError:
-            existing_transformer = ledger.transformer()
-            _transformer = existing_transformer
-        existing_params = {name: param for name, param in existing_transformer.named_parameters()}
-        existing_buffers = {name: buf for name, buf in existing_transformer.named_buffers()}
-        # diagnostics: how many keys will be copied
-        total_keys = len(new_state)
-        matched = sum(1 for k in new_state if k in existing_params or k in existing_buffers)
-        print(f"[LoRA] Transformer state keys: total={total_keys} matched_for_copy={matched}")
-        if matched == 0:
-            sample_keys = list(new_state.keys())[:10]
-            print(f"[LoRA] Warning: 0 matching keys found. sample new_state keys: {sample_keys}")
-        # Copy CPU tensors into GPU-resident transformer's params/buffers in-place
-        with torch.no_grad():
-            for k, v in new_state.items():
-                if k in existing_params:
-                    tgt = existing_params[k].data
-                    try:
-                        tgt.copy_(v.to(tgt.device))
-                    except Exception as e:
-                        print(f"[LoRA] Failed to copy parameter {k}: {type(e).__name__}: {e}")
-                elif k in existing_buffers:
-                    tgt = existing_buffers[k].data
-                    try:
-                        tgt.copy_(v.to(tgt.device))
-                    except Exception as e:
-                        print(f"[LoRA] Failed to copy buffer {k}: {type(e).__name__}: {e}")
-                else:
-                    # name mismatch — skip
-                    pass
-        # mark this key as applied
-        current_lora_key = key
-        # optional small GC
-        gc.collect()
-        torch.cuda.empty_cache()
-        print("[LoRA] In-place parameter copy complete. LoRAs applied to the existing transformer.")
-        return
-    except Exception as e:
-        import traceback
-        print(f"[LoRA] Error during in-place LoRA application (copy stage): {type(e).__name__}: {e}")
-        print(traceback.format_exc())
-        return
 # ---- REPLACE PRELOAD BLOCK START ----
 # Preload all models for ZeroGPU tensor packing.
@@ -648,7 +617,7 @@ def generate_video(
         log_memory("before pipeline call")
-        apply_loras_to_pipeline(pose_strength, general_strength, motion_strength)
         video, audio = pipeline(
             prompt=prompt,
@@ -729,6 +698,12 @@ with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
                         label="Motion Helper strength",
                         minimum=0.0, maximum=2.0, value=0.0, step=0.01
                     )
         with gr.Column():
             output_video = gr.Video(label="Generated Video", autoplay=False)
@@ -789,6 +764,12 @@ with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
         outputs=[width, height],
     )
     generate_btn.click(
         fn=generate_video,
         inputs=[

 LORA_CACHE_DIR.mkdir(exist_ok=True)
 current_lora_key: str | None = None
+PENDING_LORA_KEY: str | None = None
+PENDING_LORA_STATE: dict[str, torch.Tensor] | None = None
+PENDING_LORA_STATUS: str = "No LoRA state prepared yet."
 checkpoint_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-22b-distilled.safetensors")
 spatial_upsampler_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-spatial-upscaler-x2-1.0.safetensors")
 gemma_root = snapshot_download(repo_id=GEMMA_REPO)
 )
 # ----------------------------------------------------------------
+def _make_lora_key(pose_strength: float, general_strength: float, motion_strength: float) -> tuple[str, str]:
     rp = round(float(pose_strength), 2)
     rg = round(float(general_strength), 2)
     rm = round(float(motion_strength), 2)
     key_str = f"{pose_lora_path}:{rp}|{general_lora_path}:{rg}|{motion_lora_path}:{rm}"
     key = hashlib.sha256(key_str.encode("utf-8")).hexdigest()
+    return key, key_str
+def prepare_lora_cache(
+    pose_strength: float,
+    general_strength: float,
+    motion_strength: float,
+    progress=gr.Progress(track_tqdm=True),
+):
+    """
+    CPU-only step:
+    - checks cache
+    - loads cached fused transformer state_dict, or
+    - builds fused transformer on CPU and saves it
+    The resulting state_dict is stored in memory and can be applied later.
+    """
+    global PENDING_LORA_KEY, PENDING_LORA_STATE, PENDING_LORA_STATUS
+    ledger = pipeline.model_ledger
+    key, _ = _make_lora_key(pose_strength, general_strength, motion_strength)
+    cache_path = LORA_CACHE_DIR / f"{key}.pt"
+    progress(0.05, desc="Preparing LoRA state")
+    if cache_path.exists():
         try:
+            progress(0.20, desc="Loading cached fused state")
+            state = torch.load(cache_path, map_location="cpu")
+            PENDING_LORA_KEY = key
+            PENDING_LORA_STATE = state
+            PENDING_LORA_STATUS = f"Loaded cached LoRA state: {cache_path.name}"
+            return PENDING_LORA_STATUS
         except Exception as e:
+            print(f"[LoRA] Cache load failed: {type(e).__name__}: {e}")
+    entries = [
+        (pose_lora_path, round(float(pose_strength), 2)),
+        (general_lora_path, round(float(general_strength), 2)),
+        (motion_lora_path, round(float(motion_strength), 2)),
+    ]
+    loras_for_builder = [
+        LoraPathStrengthAndSDOps(path, strength, LTXV_LORA_COMFY_RENAMING_MAP)
+        for path, strength in entries
+        if path is not None and float(strength) != 0.0
+    ]
+    if not loras_for_builder:
+        PENDING_LORA_KEY = None
+        PENDING_LORA_STATE = None
+        PENDING_LORA_STATUS = "No non-zero LoRA strengths selected; nothing to prepare."
+        return PENDING_LORA_STATUS
+    tmp_ledger = None
+    new_transformer_cpu = None
+    try:
+        progress(0.35, desc="Building fused CPU transformer")
+        tmp_ledger = ledger.with_loras(tuple(loras_for_builder))
         orig_tmp_target = getattr(tmp_ledger, "_target_device", None)
         orig_tmp_device = getattr(tmp_ledger, "device", None)
         try:
+            tmp_ledger._target_device = lambda: torch.device("cpu")
             tmp_ledger.device = torch.device("cpu")
+            new_transformer_cpu = tmp_ledger.transformer()
         finally:
             if orig_tmp_target is not None:
                 tmp_ledger._target_device = orig_tmp_target
             else:
                 except Exception:
                     pass
+        progress(0.70, desc="Extracting fused state_dict")
+        state = new_transformer_cpu.state_dict()
+        torch.save(state, cache_path)
+        PENDING_LORA_KEY = key
+        PENDING_LORA_STATE = state
+        PENDING_LORA_STATUS = f"Built and cached LoRA state: {cache_path.name}"
+        return PENDING_LORA_STATUS
+    except Exception as e:
+        import traceback
+        print(f"[LoRA] Prepare failed: {type(e).__name__}: {e}")
+        print(traceback.format_exc())
+        PENDING_LORA_KEY = None
+        PENDING_LORA_STATE = None
+        PENDING_LORA_STATUS = f"LoRA prepare failed: {type(e).__name__}: {e}"
+        return PENDING_LORA_STATUS
+    finally:
         try:
             del new_transformer_cpu
+        except Exception:
+            pass
+        try:
             del tmp_ledger
         except Exception:
             pass
         gc.collect()
+def apply_prepared_lora_state_to_pipeline():
+    """
+    Fast step: copy the already prepared CPU state into the live transformer.
+    This is the only part that should remain near generation time.
+    """
+    global current_lora_key, PENDING_LORA_KEY, PENDING_LORA_STATE
+    if PENDING_LORA_STATE is None or PENDING_LORA_KEY is None:
+        print("[LoRA] No prepared LoRA state available; skipping.")
+        return False
+    if current_lora_key == PENDING_LORA_KEY:
+        print("[LoRA] Prepared LoRA state already active; skipping.")
+        return True
+    existing_transformer = _transformer
+    existing_params = {name: param for name, param in existing_transformer.named_parameters()}
+    existing_buffers = {name: buf for name, buf in existing_transformer.named_buffers()}
+    with torch.no_grad():
+        for k, v in PENDING_LORA_STATE.items():
+            if k in existing_params:
+                existing_params[k].data.copy_(v.to(existing_params[k].device))
+            elif k in existing_buffers:
+                existing_buffers[k].data.copy_(v.to(existing_buffers[k].device))
+    current_lora_key = PENDING_LORA_KEY
+    print("[LoRA] Prepared LoRA state applied to the pipeline.")
+    return True
 # ---- REPLACE PRELOAD BLOCK START ----
 # Preload all models for ZeroGPU tensor packing.
         log_memory("before pipeline call")
+        apply_prepared_lora_state_to_pipeline()
         video, audio = pipeline(
             prompt=prompt,
                         label="Motion Helper strength",
                         minimum=0.0, maximum=2.0, value=0.0, step=0.01
                     )
+                prepare_lora_btn = gr.Button("Prepare / Load LoRA Cache", variant="secondary")
+                lora_status = gr.Textbox(
+                    label="LoRA Cache Status",
+                    value="No LoRA state prepared yet.",
+                    interactive=False,
+                )
         with gr.Column():
             output_video = gr.Video(label="Generated Video", autoplay=False)
         outputs=[width, height],
     )
+    prepare_lora_btn.click(
+        fn=prepare_lora_cache,
+        inputs=[pose_strength, general_strength, motion_strength],
+        outputs=[lora_status],
+    )
     generate_btn.click(
         fn=generate_video,
         inputs=[