TestingCommit2

Running on Zero

App Files Files Community

dagloop5 commited on about 17 hours ago

Commit

cafc416

verified ·

1 Parent(s): 75ef1d3

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -25

app.py CHANGED Viewed

@@ -35,6 +35,7 @@ import tempfile
 from pathlib import Path
 import gc
 import hashlib
 import torch
 torch._dynamo.config.suppress_errors = True
@@ -44,7 +45,6 @@ import spaces
 import gradio as gr
 import numpy as np
 from huggingface_hub import hf_hub_download, snapshot_download
-from safetensors.torch import load_file, save_file
 from safetensors import safe_open
 import json
 import requests
@@ -263,9 +263,14 @@ print("=" * 80)
 print("Downloading LTX-2.3 distilled model + Gemma...")
 print("=" * 80)
-# LoRA cache directory and currently-applied key
-LORA_CACHE_DIR = Path("lora_cache")
-LORA_CACHE_DIR.mkdir(exist_ok=True)
 current_lora_key: str | None = None
 PENDING_LORA_KEY: str | None = None
@@ -425,21 +430,16 @@ def prepare_lora_cache(
     global PENDING_LORA_KEY, PENDING_LORA_STATE, PENDING_LORA_STATUS
     ledger = pipeline.model_ledger
-    key, _ = _make_lora_key(singularity_strength, teneros_strength, sulphur_strength, pose_strength, general_strength, motion_strength, dreamlay_strength, mself_strength, dramatic_strength, fluid_strength, liquid_strength, demopose_strength, voice_strength, realism_strength, transition_strength, physics_strength, reasoning_strength, twostep_strength, mcfurry_strength, dm_strength, praxis_strength, threed_strength, concept_strength, bulge_strength)
-    cache_path = LORA_CACHE_DIR / f"{key}.safetensors"
     progress(0.05, desc="Preparing LoRA state")
-    if cache_path.exists():
-        try:
-            progress(0.20, desc="Loading cached fused state")
-            state = load_file(str(cache_path))
-            PENDING_LORA_KEY = key
-            PENDING_LORA_STATE = state
-            PENDING_LORA_STATUS = f"Loaded cached LoRA state: {cache_path.name}"
-            return PENDING_LORA_STATUS
-        except Exception as e:
-            print(f"[LoRA] Cache load failed: {type(e).__name__}: {e}")
     entries = [
         (singularity_lora_path, round(float(singularity_strength), 2)),
         (teneros_lora_path, round(float(teneros_strength), 2)),
@@ -498,11 +498,9 @@ def prepare_lora_cache(
             k: v.detach().cpu().contiguous()
             for k, v in new_transformer_cpu.state_dict().items()
         }
-        save_file(state, str(cache_path))
         PENDING_LORA_KEY = key
         PENDING_LORA_STATE = state
-        PENDING_LORA_STATUS = f"Built and cached LoRA state: {cache_path.name}"
         return PENDING_LORA_STATUS
     except Exception as e:
@@ -528,19 +526,28 @@ def prepare_lora_cache(
 def apply_prepared_lora_state_to_pipeline():
     """
-    Fast step: copy the already prepared CPU state into the live transformer.
-    This is the only part that should remain near generation time.
     """
-    global current_lora_key, PENDING_LORA_KEY, PENDING_LORA_STATE
-    if PENDING_LORA_STATE is None or PENDING_LORA_KEY is None:
-        print("[LoRA] No prepared LoRA state available; skipping.")
         return False
     if current_lora_key == PENDING_LORA_KEY:
         print("[LoRA] Prepared LoRA state already active; skipping.")
         return True
     existing_transformer = _transformer
     with torch.no_grad():
         missing, unexpected = existing_transformer.load_state_dict(PENDING_LORA_STATE, strict=False)
@@ -548,6 +555,11 @@ def apply_prepared_lora_state_to_pipeline():
             print(f"[LoRA] load_state_dict mismatch: missing={len(missing)}, unexpected={len(unexpected)}")
     current_lora_key = PENDING_LORA_KEY
     print("[LoRA] Prepared LoRA state applied to the pipeline.")
     return True

 from pathlib import Path
 import gc
 import hashlib
+import shutil
 import torch
 torch._dynamo.config.suppress_errors = True
 import gradio as gr
 import numpy as np
 from huggingface_hub import hf_hub_download, snapshot_download
 from safetensors import safe_open
 import json
 import requests
 print("Downloading LTX-2.3 distilled model + Gemma...")
 print("=" * 80)
+# Legacy on-disk LoRA cache is no longer used; delete any old copies so the
+# Space does not run out of persistent storage and reset.
+_legacy_lora_cache_dir = Path("lora_cache")
+if _legacy_lora_cache_dir.exists():
+    shutil.rmtree(_legacy_lora_cache_dir, ignore_errors=True)
+# Only the currently-loaded key and a single transient CPU-side pending state
+# are kept. The pending state is deleted as soon as it is copied to the GPU.
 current_lora_key: str | None = None
 PENDING_LORA_KEY: str | None = None
     global PENDING_LORA_KEY, PENDING_LORA_STATE, PENDING_LORA_STATUS
     ledger = pipeline.model_ledger
+    key, _ = _make_lora_key(
+        singularity_strength, teneros_strength, sulphur_strength, pose_strength,
+        general_strength, motion_strength, dreamlay_strength, mself_strength,
+        dramatic_strength, fluid_strength, liquid_strength, demopose_strength,
+        voice_strength, realism_strength, transition_strength, physics_strength,
+        reasoning_strength, twostep_strength, mcfurry_strength, dm_strength,
+        praxis_strength, threed_strength, concept_strength, bulge_strength,
+    )
     progress(0.05, desc="Preparing LoRA state")
     entries = [
         (singularity_lora_path, round(float(singularity_strength), 2)),
         (teneros_lora_path, round(float(teneros_strength), 2)),
             k: v.detach().cpu().contiguous()
             for k, v in new_transformer_cpu.state_dict().items()
         }
         PENDING_LORA_KEY = key
         PENDING_LORA_STATE = state
+        PENDING_LORA_STATUS = "Built LoRA state (ready to apply)."
         return PENDING_LORA_STATUS
     except Exception as e:
 def apply_prepared_lora_state_to_pipeline():
     """
+    Fast ZeroGPU step: copy the already prepared CPU state into the live
+    transformer, then immediately free the CPU-side copy.
     """
+    global current_lora_key, PENDING_LORA_KEY, PENDING_LORA_STATE, PENDING_LORA_STATUS
+    if PENDING_LORA_KEY is None:
+        print("[LoRA] No prepared LoRA key available; skipping.")
         return False
+    # The same LoRA weights are already loaded into _transformer; just make
+    # sure the CPU copy is not wasting RAM.
     if current_lora_key == PENDING_LORA_KEY:
+        if PENDING_LORA_STATE is not None:
+            PENDING_LORA_STATE = None
+            PENDING_LORA_STATUS = "LoRA state already active; CPU copy freed."
         print("[LoRA] Prepared LoRA state already active; skipping.")
         return True
+    if PENDING_LORA_STATE is None:
+        print("[LoRA] LoRA key changed but no CPU state is available; skipping.")
+        return False
     existing_transformer = _transformer
     with torch.no_grad():
         missing, unexpected = existing_transformer.load_state_dict(PENDING_LORA_STATE, strict=False)
             print(f"[LoRA] load_state_dict mismatch: missing={len(missing)}, unexpected={len(unexpected)}")
     current_lora_key = PENDING_LORA_KEY
+    # The weights now live in the live transformer, so drop the CPU copy to
+    # avoid holding an extra full transformer in RAM.
+    PENDING_LORA_STATE = None
+    PENDING_LORA_STATUS = "LoRA state applied to pipeline."
     print("[LoRA] Prepared LoRA state applied to the pipeline.")
     return True