Testing3

Runtime error

App Files Files Community

dagloop5 commited on Mar 18

Commit

9921524

verified ·

1 Parent(s): 2ec52b6

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -42

app.py CHANGED Viewed

@@ -32,6 +32,7 @@ import logging
 import random
 import tempfile
 from pathlib import Path
 import torch
 torch._dynamo.config.suppress_errors = True
@@ -302,26 +303,22 @@ pipeline = LTX23DistilledA2VPipeline(
 def apply_loras_to_pipeline(pose_strength: float, general_strength: float, motion_strength: float):
     """
-    Build a temporary ModelLedger configured with the requested LoRAs, build the transformer,
-    and hot-swap it into the existing ledger without recreating the pipeline object.
-    Strategy:
-      1. Construct LoraPathStrengthAndSDOps entries for any non-zero strengths.
-      2. Use ledger.with_loras(...) to get a temporary ledger configured with those loras.
-      3. Optionally clear the existing cached transformer to reduce peak VRAM, then build the
-         transformer from the temporary ledger and hot-swap it into the live ledger.
-      4. If anything fails, print diagnostics and leave the existing pipeline in place.
     """
     ledger = pipeline.model_ledger
-    # Build convenience list and convert to the LTX primitive type (with sd_ops)
     entries = [
         (pose_lora_path, float(pose_strength)),
         (general_lora_path, float(general_strength)),
         (motion_lora_path, float(motion_strength)),
     ]
-    # Keep only nonzero strengths and valid paths (zero == disabled)
     loras_for_builder = [
         LoraPathStrengthAndSDOps(path, strength, LTXV_LORA_COMFY_RENAMING_MAP)
         for path, strength in entries
@@ -333,53 +330,83 @@ def apply_loras_to_pipeline(pose_strength: float, general_strength: float, motio
         return
     try:
-        # Create a temporary ledger configured with the extra LoRAs.
-        # with_loras accepts an iterable of LoraPathStrengthAndSDOps.
         tmp_ledger = ledger.with_loras(tuple(loras_for_builder))
         print(f"[LoRA] Built temporary ledger with {len(loras_for_builder)} LoRA(s).")
-        # Attempt to free previously cached transformer instance to reduce peak VRAM.
-        # (We cached instances earlier and replaced ledger.<component> with lambdas returning them.)
         try:
-            # If ModelLedger implements clear_vram, call it to release cached GPU tensors.
-            if hasattr(ledger, "clear_vram"):
-                ledger.clear_vram()
-                print("[LoRA] Cleared old ledger VRAM cache before building new transformer.")
-        except Exception as e:
-            print(f"[LoRA] Warning: ledger.clear_vram() failed: {type(e).__name__}: {e}")
-        # Build the new transformer from the temporary ledger (this will load & fuse LoRAs).
-        print("[LoRA] Building transformer from temporary ledger (this may take time / spike VRAM)...")
-        new_transformer = tmp_ledger.transformer()  # returns an X0Model moved to device
-        print("[LoRA] New transformer built successfully.")
-        # Replace cached transformer instance and hot-swap ledger.transformer to return the new one.
         global _transformer
         try:
-            # Remove old Python ref if present to allow GC.
-            del _transformer
         except Exception:
             pass
         torch.cuda.empty_cache()
-        _transformer = new_transformer
-        ledger.transformer = lambda: _transformer
-        print("[LoRA] Hot-swapped new transformer into ledger successfully.")
-        # Done
         return
     except Exception as e:
         import traceback
-        print(f"[LoRA] Error during builder-based LoRA application: {type(e).__name__}: {e}")
         print(traceback.format_exc())
-    # Final fallback (should rarely hit if above path works):
-    try:
-        print("[LoRA] Falling back to pipeline.loras attribute assignment (best-effort).")
-        pipeline.loras = [(p, float(s)) for p, s in entries if p is not None]
-    except Exception as e:
-        print(f"[LoRA] Fallback pipeline.loras assignment failed: {type(e).__name__}: {e}")
-    print("[LoRA] apply_loras_to_pipeline finished (some approaches may not have taken effect).")
 # ---- REPLACE PRELOAD BLOCK START ----
 # Preload all models for ZeroGPU tensor packing.

 import random
 import tempfile
 from pathlib import Path
+import gc
 import torch
 torch._dynamo.config.suppress_errors = True
 def apply_loras_to_pipeline(pose_strength: float, general_strength: float, motion_strength: float):
     """
+    Apply LoRAs by:
+      1) creating a temporary ledger with requested LoRAs,
+      2) building the fused transformer on CPU only,
+      3) copying parameters & buffers in-place into the existing GPU transformer,
+      4) freeing CPU objects and clearing cache.
+    This avoids having two full transformers on GPU simultaneously.
     """
     ledger = pipeline.model_ledger
     entries = [
         (pose_lora_path, float(pose_strength)),
         (general_lora_path, float(general_strength)),
         (motion_lora_path, float(motion_strength)),
     ]
+    # Build LoraPathStrengthAndSDOps for non-zero strengths
     loras_for_builder = [
         LoraPathStrengthAndSDOps(path, strength, LTXV_LORA_COMFY_RENAMING_MAP)
         for path, strength in entries
         return
     try:
+        # Create temporary ledger configured with LoRAs
         tmp_ledger = ledger.with_loras(tuple(loras_for_builder))
         print(f"[LoRA] Built temporary ledger with {len(loras_for_builder)} LoRA(s).")
+        # Force the temporary ledger to build on CPU so the fused model is built on CPU.
+        # Save original attributes to restore them later.
+        orig_tmp_target = getattr(tmp_ledger, "_target_device", None)
+        orig_tmp_device = getattr(tmp_ledger, "device", None)
         try:
+            tmp_ledger._target_device = torch.device("cpu")
+            tmp_ledger.device = torch.device("cpu")
+            print("[LoRA] Building fused transformer on CPU (no GPU allocation)...")
+            new_transformer_cpu = tmp_ledger.transformer()  # returns model on CPU now
+            print("[LoRA] Fused transformer built on CPU.")
+        finally:
+            # Restore attributes (defensive)
+            if orig_tmp_target is not None:
+                tmp_ledger._target_device = orig_tmp_target
+            if orig_tmp_device is not None:
+                tmp_ledger.device = orig_tmp_device
+        # Get the existing transformer instance (the one currently used by the pipeline).
         global _transformer
         try:
+            existing_transformer = _transformer
+        except NameError:
+            # If not cached, ask ledger for it (this will be the GPU-resident model already loaded).
+            existing_transformer = ledger.transformer()
+            _transformer = existing_transformer
+        # Map existing parameters & buffers for quick lookup
+        existing_params = {name: param for name, param in existing_transformer.named_parameters()}
+        existing_buffers = {name: buf for name, buf in existing_transformer.named_buffers()}
+        # State dict of CPU model (fused with LoRAs)
+        new_state = new_transformer_cpu.state_dict()
+        # Copy CPU tensors into the GPU-resident transformer's params/buffers in-place
+        with torch.no_grad():
+            for k, v in new_state.items():
+                if k in existing_params:
+                    tgt = existing_params[k].data
+                    try:
+                        tgt.copy_(v.to(tgt.device))
+                    except Exception as e:
+                        print(f"[LoRA] Failed to copy parameter {k}: {type(e).__name__}: {e}")
+                elif k in existing_buffers:
+                    tgt = existing_buffers[k].data
+                    try:
+                        tgt.copy_(v.to(tgt.device))
+                    except Exception as e:
+                        print(f"[LoRA] Failed to copy buffer {k}: {type(e).__name__}: {e}")
+                else:
+                    # Parameter name mismatch — skip
+                    # This can happen if LoRA changes expected keys; not fatal.
+                    # Print debug once for the first few unmatched keys.
+                    pass
+        # Free CPU-built transformer and temporary ledger resources, then clear caches
+        try:
+            del new_transformer_cpu
+            del tmp_ledger
         except Exception:
             pass
+        gc.collect()
         torch.cuda.empty_cache()
+        print("[LoRA] In-place parameter copy complete. LoRAs applied to the existing transformer.")
         return
     except Exception as e:
         import traceback
+        print(f"[LoRA] Error during in-place LoRA application: {type(e).__name__}: {e}")
         print(traceback.format_exc())
+    # If something unexpectedly failed, bail out (no fallback).
+    print("[LoRA] apply_loras_to_pipeline finished (LOADING FAILED — no changes applied).")
 # ---- REPLACE PRELOAD BLOCK START ----
 # Preload all models for ZeroGPU tensor packing.