Spaces:
Paused
Paused
Reduce ORT memory: disable prepacking, basic optimization, 1 thread
Browse files
app.py
CHANGED
|
@@ -50,12 +50,15 @@ def _load_all():
|
|
| 50 |
print("[init] Downloading VAE...")
|
| 51 |
vae_path = _download(repo, "vae_full.pt", MODELS_DIR, token)
|
| 52 |
|
| 53 |
-
print("[init] Creating ONNX Runtime session...")
|
| 54 |
t0 = time.time()
|
| 55 |
opts = ort.SessionOptions()
|
| 56 |
-
opts.graph_optimization_level = ort.GraphOptimizationLevel.
|
| 57 |
-
opts.inter_op_num_threads =
|
| 58 |
opts.intra_op_num_threads = 2
|
|
|
|
|
|
|
|
|
|
| 59 |
dit_session = ort.InferenceSession(onnx_path, opts, providers=["CPUExecutionProvider"])
|
| 60 |
print(f"[init] DiT session ready in {time.time() - t0:.0f}s")
|
| 61 |
|
|
|
|
| 50 |
print("[init] Downloading VAE...")
|
| 51 |
vae_path = _download(repo, "vae_full.pt", MODELS_DIR, token)
|
| 52 |
|
| 53 |
+
print("[init] Creating ONNX Runtime session (mmap + low memory)...")
|
| 54 |
t0 = time.time()
|
| 55 |
opts = ort.SessionOptions()
|
| 56 |
+
opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC
|
| 57 |
+
opts.inter_op_num_threads = 1
|
| 58 |
opts.intra_op_num_threads = 2
|
| 59 |
+
opts.enable_mem_pattern = True
|
| 60 |
+
opts.enable_mem_reuse = True
|
| 61 |
+
opts.add_session_config_entry("session.disable_prepacking", "1")
|
| 62 |
dit_session = ort.InferenceSession(onnx_path, opts, providers=["CPUExecutionProvider"])
|
| 63 |
print(f"[init] DiT session ready in {time.time() - t0:.0f}s")
|
| 64 |
|