Spaces:

WeReCooking
/

sapiens2-cpu

Running

App Files Files Community

Nekochu commited on 8 days ago

Commit

86547e5

1 Parent(s): 807e510

Hard-evict caches when loading 1B variants

Browse files

Files changed (1) hide show

app.py +19 -19

app.py CHANGED Viewed

@@ -233,19 +233,21 @@ def _get_dense_model(task: str, size: str):
     os.makedirs(local_dir, exist_ok=True)
     ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
-    # If a 5B ORT session (5-6 GB) is resident, evict it before init_model allocates the 1B dense weights.
-    if size == "1b" and _ORT_SESSIONS:
         _ORT_SESSIONS.clear()
-        import gc
         gc.collect()
     model = init_model(config, ckpt, device="cpu")
-    while len(_MODELS) >= _MAX_CACHED:
-        oldest = next(iter(_MODELS))
-        del _MODELS[oldest]
-        import gc
-        gc.collect()
     _MODELS[key] = model
     return model
@@ -280,11 +282,15 @@ def _get_pose_model(size: str):
     os.makedirs(local_dir, exist_ok=True)
     ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
-    # Drop a resident 5B ORT session before allocating the 1B pose weights.
-    if size == "1b" and _ORT_SESSIONS:
         _ORT_SESSIONS.clear()
-        import gc
-        gc.collect()
     model = init_model(config, ckpt, device="cpu")
@@ -293,12 +299,6 @@ def _get_pose_model(size: str):
     model.codec = UDPHeatmap(**codec_cfg)
     model.pose_metainfo = _get_pose_metainfo()
-    # Free the largest cached dense model first if more than one pose model present.
-    while len(_POSE_MODELS) >= 1:
-        oldest = next(iter(_POSE_MODELS))
-        del _POSE_MODELS[oldest]
-        import gc
-        gc.collect()
     _POSE_MODELS[key] = model
     return model

     os.makedirs(local_dir, exist_ok=True)
     ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
+    # cpu-basic has 16 GB. Loading a 1B dense (~6 GB fp32) on top of cached 0.8b/0.4b dense (~5 GB each) + a 1B pose + DETR OOMs.
+    # So before init_model allocates a 1B's weights, evict ALL caches it would race with.
+    import gc
+    if size == "1b":
+        _MODELS.clear()
+        _POSE_MODELS.clear()
         _ORT_SESSIONS.clear()
         gc.collect()
+    else:
+        while len(_MODELS) >= _MAX_CACHED:
+            oldest = next(iter(_MODELS))
+            del _MODELS[oldest]
+            gc.collect()
     model = init_model(config, ckpt, device="cpu")
     _MODELS[key] = model
     return model
     os.makedirs(local_dir, exist_ok=True)
     ckpt = hf_hub_download(repo_id=spec["repo"], filename=spec["filename"], local_dir=local_dir)
+    # Same hard eviction as the dense 1B path: clear every other resident model before init_model allocates.
+    import gc
+    if size == "1b":
+        _MODELS.clear()
+        _POSE_MODELS.clear()
         _ORT_SESSIONS.clear()
+    else:
+        _POSE_MODELS.clear()  # cap=1
+    gc.collect()
     model = init_model(config, ckpt, device="cpu")
     model.codec = UDPHeatmap(**codec_cfg)
     model.pose_metainfo = _get_pose_metainfo()
     _POSE_MODELS[key] = model
     return model