Nekochu commited on
Commit
10d0786
·
verified ·
1 Parent(s): 1620b49

Reduce ORT memory: disable prepacking, basic optimization, 1 thread

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -50,12 +50,15 @@ def _load_all():
50
  print("[init] Downloading VAE...")
51
  vae_path = _download(repo, "vae_full.pt", MODELS_DIR, token)
52
 
53
- print("[init] Creating ONNX Runtime session...")
54
  t0 = time.time()
55
  opts = ort.SessionOptions()
56
- opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
57
- opts.inter_op_num_threads = 2
58
  opts.intra_op_num_threads = 2
 
 
 
59
  dit_session = ort.InferenceSession(onnx_path, opts, providers=["CPUExecutionProvider"])
60
  print(f"[init] DiT session ready in {time.time() - t0:.0f}s")
61
 
 
50
  print("[init] Downloading VAE...")
51
  vae_path = _download(repo, "vae_full.pt", MODELS_DIR, token)
52
 
53
+ print("[init] Creating ONNX Runtime session (mmap + low memory)...")
54
  t0 = time.time()
55
  opts = ort.SessionOptions()
56
+ opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC
57
+ opts.inter_op_num_threads = 1
58
  opts.intra_op_num_threads = 2
59
+ opts.enable_mem_pattern = True
60
+ opts.enable_mem_reuse = True
61
+ opts.add_session_config_entry("session.disable_prepacking", "1")
62
  dit_session = ort.InferenceSession(onnx_path, opts, providers=["CPUExecutionProvider"])
63
  print(f"[init] DiT session ready in {time.time() - t0:.0f}s")
64