Testing3

Runtime error

App Files Files Community

dagloop5 commited on Mar 18

Commit

b7c6b7f

verified ·

1 Parent(s): ae1a1ae

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -54

app.py CHANGED Viewed

@@ -266,6 +266,11 @@ print("=" * 80)
 print("Downloading LTX-2.3 distilled model + Gemma...")
 print("=" * 80)
 checkpoint_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-22b-distilled.safetensors")
 spatial_upsampler_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-spatial-upscaler-x2-1.0.safetensors")
 gemma_root = snapshot_download(repo_id=GEMMA_REPO)
@@ -303,56 +308,92 @@ pipeline = LTX23DistilledA2VPipeline(
 def apply_loras_to_pipeline(pose_strength: float, general_strength: float, motion_strength: float):
     """
-    Apply LoRAs by:
-      1) creating a temporary ledger with requested LoRAs,
-      2) building the fused transformer on CPU only,
-      3) copying parameters & buffers in-place into the existing GPU transformer,
-      4) freeing CPU objects and clearing cache.
-    This avoids having two full transformers on GPU simultaneously.
     """
     ledger = pipeline.model_ledger
-    entries = [
-        (pose_lora_path, float(pose_strength)),
-        (general_lora_path, float(general_strength)),
-        (motion_lora_path, float(motion_strength)),
-    ]
-    # Build LoraPathStrengthAndSDOps for non-zero strengths
-    loras_for_builder = [
-        LoraPathStrengthAndSDOps(path, strength, LTXV_LORA_COMFY_RENAMING_MAP)
-        for path, strength in entries
-        if path is not None and float(strength) != 0.0
-    ]
-    if len(loras_for_builder) == 0:
-        print("[LoRA] No nonzero LoRA strengths — skipping rebuild.")
         return
-    try:
-        # Create temporary ledger configured with LoRAs
-        tmp_ledger = ledger.with_loras(tuple(loras_for_builder))
-        print(f"[LoRA] Built temporary ledger with {len(loras_for_builder)} LoRA(s).")
-        # Force the temporary ledger to build on CPU so the fused model is built on CPU.
-        # Save original attributes to restore them later.
         orig_tmp_target = getattr(tmp_ledger, "_target_device", None)
         orig_tmp_device = getattr(tmp_ledger, "device", None)
         try:
-            # _target_device is expected to be callable by model_ledger.transformer()
-            # set it to a callable that returns CPU so builder.build(device=...) works.
             tmp_ledger._target_device = (lambda: torch.device("cpu"))
-            # ledger.device is used after build: set it to CPU so .to(self.device) keeps the model on CPU.
             tmp_ledger.device = torch.device("cpu")
             print("[LoRA] Building fused transformer on CPU (no GPU allocation)...")
-            new_transformer_cpu = tmp_ledger.transformer()  # should now return a CPU model
             print("[LoRA] Fused transformer built on CPU.")
         finally:
-            # Restore attributes to their previous values (if there were any).
             if orig_tmp_target is not None:
                 tmp_ledger._target_device = orig_tmp_target
             else:
-                # remove attribute if ledger did not have it previously
                 try:
                     delattr(tmp_ledger, "_target_device")
                 except Exception:
@@ -365,31 +406,54 @@ def apply_loras_to_pipeline(pose_strength: float, general_strength: float, motio
                 except Exception:
                     pass
-        # Get the existing transformer instance (the one currently used by the pipeline).
         global _transformer
         try:
             existing_transformer = _transformer
         except NameError:
-            # If not cached, ask ledger for it (this will be the GPU-resident model already loaded).
             existing_transformer = ledger.transformer()
             _transformer = existing_transformer
-        # Map existing parameters & buffers for quick lookup
         existing_params = {name: param for name, param in existing_transformer.named_parameters()}
         existing_buffers = {name: buf for name, buf in existing_transformer.named_buffers()}
-        # State dict of CPU model (fused with LoRAs)
-        new_state = new_transformer_cpu.state_dict()
         # diagnostics: how many keys will be copied
         total_keys = len(new_state)
         matched = sum(1 for k in new_state if k in existing_params or k in existing_buffers)
         print(f"[LoRA] Transformer state keys: total={total_keys} matched_for_copy={matched}")
         if matched == 0:
-            # helpful hint if naming differs
             sample_keys = list(new_state.keys())[:10]
             print(f"[LoRA] Warning: 0 matching keys found. sample new_state keys: {sample_keys}")
-        # Copy CPU tensors into the GPU-resident transformer's params/buffers in-place
         with torch.no_grad():
             for k, v in new_state.items():
                 if k in existing_params:
@@ -405,30 +469,22 @@ def apply_loras_to_pipeline(pose_strength: float, general_strength: float, motio
                     except Exception as e:
                         print(f"[LoRA] Failed to copy buffer {k}: {type(e).__name__}: {e}")
                 else:
-                    # Parameter name mismatch — skip
-                    # This can happen if LoRA changes expected keys; not fatal.
-                    # Print debug once for the first few unmatched keys.
                     pass
-        # Free CPU-built transformer and temporary ledger resources, then clear caches
-        try:
-            del new_transformer_cpu
-            del tmp_ledger
-        except Exception:
-            pass
         gc.collect()
         torch.cuda.empty_cache()
         print("[LoRA] In-place parameter copy complete. LoRAs applied to the existing transformer.")
         return
     except Exception as e:
         import traceback
-        print(f"[LoRA] Error during in-place LoRA application: {type(e).__name__}: {e}")
         print(traceback.format_exc())
-    # If something unexpectedly failed, bail out (no fallback).
-    print("[LoRA] apply_loras_to_pipeline finished (LOADING FAILED — no changes applied).")
 # ---- REPLACE PRELOAD BLOCK START ----
 # Preload all models for ZeroGPU tensor packing.

 print("Downloading LTX-2.3 distilled model + Gemma...")
 print("=" * 80)
+# LoRA cache directory and currently-applied key
+LORA_CACHE_DIR = Path("lora_cache")
+LORA_CACHE_DIR.mkdir(exist_ok=True)
+current_lora_key: str | None = None
 checkpoint_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-22b-distilled.safetensors")
 spatial_upsampler_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-spatial-upscaler-x2-1.0.safetensors")
 gemma_root = snapshot_download(repo_id=GEMMA_REPO)
 def apply_loras_to_pipeline(pose_strength: float, general_strength: float, motion_strength: float):
     """
+    Apply LoRAs using cached fused state_dicts when available, otherwise
+    build fused transformer on CPU, save its state_dict to cache, and then
+    copy parameters in-place into the GPU-resident transformer.
+    Caching key:
+      sha256( f"{pose_path}:{round(pose,2)}|{general_path}:{round(general,2)}|{motion_path}:{round(motion,2)}" )
+    Rounding to 2 decimals reduces unique keys and increases cache hits.
     """
     ledger = pipeline.model_ledger
+    global current_lora_key, LORA_CACHE_DIR
+    # Round strengths to 2 decimals for cache stability
+    rp = round(float(pose_strength), 2)
+    rg = round(float(general_strength), 2)
+    rm = round(float(motion_strength), 2)
+    key_str = f"{pose_lora_path}:{rp}|{general_lora_path}:{rg}|{motion_lora_path}:{rm}"
+    key = hashlib.sha256(key_str.encode("utf-8")).hexdigest()
+    cache_path = LORA_CACHE_DIR / f"{key}.pt"
+    # If same key already applied, skip
+    if current_lora_key == key:
+        print("[LoRA] Key unchanged; skipping rebuild/copy.")
         return
+    # If cache exists, load the fused state_dict directly
+    new_state = None
+    if cache_path.exists():
+        try:
+            print("[LoRA] Cache hit: loading fused state_dict from cache.")
+            new_state = torch.load(cache_path, map_location="cpu")
+            print("[LoRA] Loaded cached fused state_dict.")
+        except Exception as e:
+            print(f"[LoRA] Failed to load cache {cache_path}: {type(e).__name__}: {e}")
+            new_state = None
+    # If no cache, build tmp_ledger and create fused transformer on CPU, save state_dict
+    if new_state is None:
+        # Only build if there is at least one non-zero strength
+        entries = [
+            (pose_lora_path, rp),
+            (general_lora_path, rg),
+            (motion_lora_path, rm),
+        ]
+        loras_for_builder = [
+            LoraPathStrengthAndSDOps(path, strength, LTXV_LORA_COMFY_RENAMING_MAP)
+            for path, strength in entries
+            if path is not None and float(strength) != 0.0
+        ]
+        if len(loras_for_builder) == 0:
+            print("[LoRA] No nonzero LoRA strengths — skipping rebuild.")
+            return
+        try:
+            tmp_ledger = ledger.with_loras(tuple(loras_for_builder))
+            print(f"[LoRA] Built temporary ledger with {len(loras_for_builder)} LoRA(s).")
+        except Exception as e:
+            print(f"[LoRA] Failed to create temporary ledger: {type(e).__name__}: {e}")
+            return
+        # Build fused transformer on CPU only. Ensure tmp_ledger._target_device is callable.
         orig_tmp_target = getattr(tmp_ledger, "_target_device", None)
         orig_tmp_device = getattr(tmp_ledger, "device", None)
         try:
             tmp_ledger._target_device = (lambda: torch.device("cpu"))
             tmp_ledger.device = torch.device("cpu")
             print("[LoRA] Building fused transformer on CPU (no GPU allocation)...")
+            new_transformer_cpu = tmp_ledger.transformer()  # model on CPU
             print("[LoRA] Fused transformer built on CPU.")
+        except Exception as e:
+            import traceback
+            print(f"[LoRA] Error while building fused transformer on CPU: {type(e).__name__}: {e}")
+            print(traceback.format_exc())
+            # cleanup
+            try:
+                del tmp_ledger
+            except Exception:
+                pass
+            gc.collect()
+            return
         finally:
+            # restore attributes
             if orig_tmp_target is not None:
                 tmp_ledger._target_device = orig_tmp_target
             else:
                 try:
                     delattr(tmp_ledger, "_target_device")
                 except Exception:
                 except Exception:
                     pass
+        # Extract state_dict on CPU and save to cache
+        try:
+            new_state = new_transformer_cpu.state_dict()
+            # Save CPU state_dict for future reuse
+            try:
+                torch.save(new_state, cache_path)
+                print(f"[LoRA] Saved fused state_dict to cache: {cache_path}")
+            except Exception as e:
+                print(f"[LoRA] Warning: failed to save cache {cache_path}: {type(e).__name__}: {e}")
+        except Exception as e:
+            print(f"[LoRA] Failed to get state_dict from CPU model: {type(e).__name__}: {e}")
+            new_state = None
+        # Free CPU model and temporary ledger to release memory
+        try:
+            del new_transformer_cpu
+            del tmp_ledger
+        except Exception:
+            pass
+        gc.collect()
+        torch.cuda.empty_cache()
+        if new_state is None:
+            print("[LoRA] Building fused state failed; aborting.")
+            return
+    # At this point new_state is a CPU state_dict (either from cache or just-built)
+    try:
+        # Get existing GPU-resident transformer (cached reference _transformer)
         global _transformer
         try:
             existing_transformer = _transformer
         except NameError:
             existing_transformer = ledger.transformer()
             _transformer = existing_transformer
         existing_params = {name: param for name, param in existing_transformer.named_parameters()}
         existing_buffers = {name: buf for name, buf in existing_transformer.named_buffers()}
         # diagnostics: how many keys will be copied
         total_keys = len(new_state)
         matched = sum(1 for k in new_state if k in existing_params or k in existing_buffers)
         print(f"[LoRA] Transformer state keys: total={total_keys} matched_for_copy={matched}")
         if matched == 0:
             sample_keys = list(new_state.keys())[:10]
             print(f"[LoRA] Warning: 0 matching keys found. sample new_state keys: {sample_keys}")
+        # Copy CPU tensors into GPU-resident transformer's params/buffers in-place
         with torch.no_grad():
             for k, v in new_state.items():
                 if k in existing_params:
                     except Exception as e:
                         print(f"[LoRA] Failed to copy buffer {k}: {type(e).__name__}: {e}")
                 else:
+                    # name mismatch — skip
                     pass
+        # mark this key as applied
+        current_lora_key = key
+        # optional small GC
         gc.collect()
         torch.cuda.empty_cache()
         print("[LoRA] In-place parameter copy complete. LoRAs applied to the existing transformer.")
         return
     except Exception as e:
         import traceback
+        print(f"[LoRA] Error during in-place LoRA application (copy stage): {type(e).__name__}: {e}")
         print(traceback.format_exc())
+        return
 # ---- REPLACE PRELOAD BLOCK START ----
 # Preload all models for ZeroGPU tensor packing.