Spaces:
Running
Running
Hard-evict caches when loading 1B variants
Browse files
app.py
CHANGED
|
@@ -233,19 +233,21 @@ def _get_dense_model(task: str, size: str):
|
|
| 233 |
os.makedirs(local_dir, exist_ok=True)
|
| 234 |
ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
|
| 235 |
|
| 236 |
-
#
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
_ORT_SESSIONS.clear()
|
| 239 |
-
import gc
|
| 240 |
gc.collect()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
model = init_model(config, ckpt, device="cpu")
|
| 243 |
-
|
| 244 |
-
while len(_MODELS) >= _MAX_CACHED:
|
| 245 |
-
oldest = next(iter(_MODELS))
|
| 246 |
-
del _MODELS[oldest]
|
| 247 |
-
import gc
|
| 248 |
-
gc.collect()
|
| 249 |
_MODELS[key] = model
|
| 250 |
return model
|
| 251 |
|
|
@@ -280,11 +282,15 @@ def _get_pose_model(size: str):
|
|
| 280 |
os.makedirs(local_dir, exist_ok=True)
|
| 281 |
ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
|
| 282 |
|
| 283 |
-
#
|
| 284 |
-
|
|
|
|
|
|
|
|
|
|
| 285 |
_ORT_SESSIONS.clear()
|
| 286 |
-
|
| 287 |
-
|
|
|
|
| 288 |
|
| 289 |
model = init_model(config, ckpt, device="cpu")
|
| 290 |
|
|
@@ -293,12 +299,6 @@ def _get_pose_model(size: str):
|
|
| 293 |
model.codec = UDPHeatmap(**codec_cfg)
|
| 294 |
model.pose_metainfo = _get_pose_metainfo()
|
| 295 |
|
| 296 |
-
# Free the largest cached dense model first if more than one pose model present.
|
| 297 |
-
while len(_POSE_MODELS) >= 1:
|
| 298 |
-
oldest = next(iter(_POSE_MODELS))
|
| 299 |
-
del _POSE_MODELS[oldest]
|
| 300 |
-
import gc
|
| 301 |
-
gc.collect()
|
| 302 |
_POSE_MODELS[key] = model
|
| 303 |
return model
|
| 304 |
|
|
|
|
| 233 |
os.makedirs(local_dir, exist_ok=True)
|
| 234 |
ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
|
| 235 |
|
| 236 |
+
# cpu-basic has 16 GB. Loading a 1B dense (~6 GB fp32) on top of cached 0.8b/0.4b dense (~5 GB each) + a 1B pose + DETR OOMs.
|
| 237 |
+
# So before init_model allocates a 1B's weights, evict ALL caches it would race with.
|
| 238 |
+
import gc
|
| 239 |
+
if size == "1b":
|
| 240 |
+
_MODELS.clear()
|
| 241 |
+
_POSE_MODELS.clear()
|
| 242 |
_ORT_SESSIONS.clear()
|
|
|
|
| 243 |
gc.collect()
|
| 244 |
+
else:
|
| 245 |
+
while len(_MODELS) >= _MAX_CACHED:
|
| 246 |
+
oldest = next(iter(_MODELS))
|
| 247 |
+
del _MODELS[oldest]
|
| 248 |
+
gc.collect()
|
| 249 |
|
| 250 |
model = init_model(config, ckpt, device="cpu")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
_MODELS[key] = model
|
| 252 |
return model
|
| 253 |
|
|
|
|
| 282 |
os.makedirs(local_dir, exist_ok=True)
|
| 283 |
ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
|
| 284 |
|
| 285 |
+
# Same hard eviction as the dense 1B path: clear every other resident model before init_model allocates.
|
| 286 |
+
import gc
|
| 287 |
+
if size == "1b":
|
| 288 |
+
_MODELS.clear()
|
| 289 |
+
_POSE_MODELS.clear()
|
| 290 |
_ORT_SESSIONS.clear()
|
| 291 |
+
else:
|
| 292 |
+
_POSE_MODELS.clear() # cap=1
|
| 293 |
+
gc.collect()
|
| 294 |
|
| 295 |
model = init_model(config, ckpt, device="cpu")
|
| 296 |
|
|
|
|
| 299 |
model.codec = UDPHeatmap(**codec_cfg)
|
| 300 |
model.pose_metainfo = _get_pose_metainfo()
|
| 301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
_POSE_MODELS[key] = model
|
| 303 |
return model
|
| 304 |
|