Nekochu commited on
Commit
86547e5
·
1 Parent(s): 807e510

Hard-evict caches when loading 1B variants

Browse files
Files changed (1) hide show
  1. app.py +19 -19
app.py CHANGED
@@ -233,19 +233,21 @@ def _get_dense_model(task: str, size: str):
233
  os.makedirs(local_dir, exist_ok=True)
234
  ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
235
 
236
- # If a 5B ORT session (5-6 GB) is resident, evict it before init_model allocates the 1B dense weights.
237
- if size == "1b" and _ORT_SESSIONS:
 
 
 
 
238
  _ORT_SESSIONS.clear()
239
- import gc
240
  gc.collect()
 
 
 
 
 
241
 
242
  model = init_model(config, ckpt, device="cpu")
243
-
244
- while len(_MODELS) >= _MAX_CACHED:
245
- oldest = next(iter(_MODELS))
246
- del _MODELS[oldest]
247
- import gc
248
- gc.collect()
249
  _MODELS[key] = model
250
  return model
251
 
@@ -280,11 +282,15 @@ def _get_pose_model(size: str):
280
  os.makedirs(local_dir, exist_ok=True)
281
  ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
282
 
283
- # Drop a resident 5B ORT session before allocating the 1B pose weights.
284
- if size == "1b" and _ORT_SESSIONS:
 
 
 
285
  _ORT_SESSIONS.clear()
286
- import gc
287
- gc.collect()
 
288
 
289
  model = init_model(config, ckpt, device="cpu")
290
 
@@ -293,12 +299,6 @@ def _get_pose_model(size: str):
293
  model.codec = UDPHeatmap(**codec_cfg)
294
  model.pose_metainfo = _get_pose_metainfo()
295
 
296
- # Free the largest cached dense model first if more than one pose model present.
297
- while len(_POSE_MODELS) >= 1:
298
- oldest = next(iter(_POSE_MODELS))
299
- del _POSE_MODELS[oldest]
300
- import gc
301
- gc.collect()
302
  _POSE_MODELS[key] = model
303
  return model
304
 
 
233
  os.makedirs(local_dir, exist_ok=True)
234
  ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
235
 
236
+ # cpu-basic has 16 GB. Loading a 1B dense (~6 GB fp32) on top of cached 0.8b/0.4b dense (~5 GB each) + a 1B pose + DETR OOMs.
237
+ # So before init_model allocates a 1B's weights, evict ALL caches it would race with.
238
+ import gc
239
+ if size == "1b":
240
+ _MODELS.clear()
241
+ _POSE_MODELS.clear()
242
  _ORT_SESSIONS.clear()
 
243
  gc.collect()
244
+ else:
245
+ while len(_MODELS) >= _MAX_CACHED:
246
+ oldest = next(iter(_MODELS))
247
+ del _MODELS[oldest]
248
+ gc.collect()
249
 
250
  model = init_model(config, ckpt, device="cpu")
 
 
 
 
 
 
251
  _MODELS[key] = model
252
  return model
253
 
 
282
  os.makedirs(local_dir, exist_ok=True)
283
  ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
284
 
285
+ # Same hard eviction as the dense 1B path: clear every other resident model before init_model allocates.
286
+ import gc
287
+ if size == "1b":
288
+ _MODELS.clear()
289
+ _POSE_MODELS.clear()
290
  _ORT_SESSIONS.clear()
291
+ else:
292
+ _POSE_MODELS.clear() # cap=1
293
+ gc.collect()
294
 
295
  model = init_model(config, ckpt, device="cpu")
296
 
 
299
  model.codec = UDPHeatmap(**codec_cfg)
300
  model.pose_metainfo = _get_pose_metainfo()
301
 
 
 
 
 
 
 
302
  _POSE_MODELS[key] = model
303
  return model
304