Spaces:

Sbhat2026
/

protfunc

Sleeping

App Files Files Community

Sbhat2026 commited on Apr 10

Commit

b91bbd2

verified ·

1 Parent(s): 621deb3

fix: revert to improved model, add top-30 prediction cap

Browse files

Files changed (1) hide show

server.py +21 -8

server.py CHANGED Viewed

@@ -215,8 +215,8 @@ def load_go_map():
 def load_thresholds():
     for path in [
-        os.path.join(BASE_DIR, "protfunc_v3_thresholds.json"),
         os.path.join(BASE_DIR, "improved_per_label_thresholds.json"),
         os.path.join(BASE_DIR, "per_label_thresholds.json"),
         os.path.join(BASE_DIR, "artifacts", "per_label_thresholds.json"),
     ]:
@@ -518,10 +518,12 @@ async def lifespan(app: FastAPI):
     import numpy as np
     device = torch.device("cpu")
-    # Prefer checkpoints in priority order: v3 > improved > baseline
     ckpt_candidates = [
-        os.path.join(BASE_DIR, "protfunc_v3.pth"),
         os.path.join(BASE_DIR, "improved_res.pth"),
         os.path.join(BASE_DIR, "baseline_res.pth"),
     ]
     ckpt_path = next((p for p in ckpt_candidates if os.path.exists(p)), None)
@@ -589,16 +591,18 @@ async def root():
 @app.get("/api/model/info")
 async def model_info():
     """Return model metadata and configuration."""
-    v3      = os.path.exists(os.path.join(BASE_DIR, "protfunc_v3.pth"))
     improved = os.path.exists(os.path.join(BASE_DIR, "improved_res.pth"))
-    if v3:
-        name, version = "ProtFunc v3 (supplemented + mammal)", "3.0.0"
     elif improved:
-        name, version = "ProtFunc Enhanced", "2.1.0"
     else:
-        name, version = "ProtFunc", "2.0.0"
     return {
         "model_name":            name,
         "version":               version,
         "esm_model":             "esm2_t6_8M_UR50D",
         "embed_dim":             320,
@@ -749,6 +753,12 @@ async def predict(request: ProteinRequest):
                     })
                     prob_map[mlb.classes_[i]] = pv
             raw_preds.sort(key=lambda x: x["prob"], reverse=True)
             # Propagate up GO DAG and filter
             visible, suppressed = propagate_and_filter(
@@ -821,6 +831,9 @@ async def predict_batch(request: BatchPredictRequest):
                     })
                     prob_map[mlb.classes_[i]] = pv
             raw_preds.sort(key=lambda x: x["prob"], reverse=True)
             visible, suppressed = propagate_and_filter(
                 raw_preds, go_parents, go_ancestors, prob_map

 def load_thresholds():
     for path in [
         os.path.join(BASE_DIR, "improved_per_label_thresholds.json"),
+        os.path.join(BASE_DIR, "protfunc_v3_thresholds.json"),
         os.path.join(BASE_DIR, "per_label_thresholds.json"),
         os.path.join(BASE_DIR, "artifacts", "per_label_thresholds.json"),
     ]:
     import numpy as np
     device = torch.device("cpu")
+    # Prefer checkpoints in priority order: improved > v3 > baseline
+    # (v3 was trained with propagated training labels which inflates predictions;
+    #  improved_res.pth has verified Fmax=0.8846 on specific GO annotations)
     ckpt_candidates = [
         os.path.join(BASE_DIR, "improved_res.pth"),
+        os.path.join(BASE_DIR, "protfunc_v3.pth"),
         os.path.join(BASE_DIR, "baseline_res.pth"),
     ]
     ckpt_path = next((p for p in ckpt_candidates if os.path.exists(p)), None)
 @app.get("/api/model/info")
 async def model_info():
     """Return model metadata and configuration."""
     improved = os.path.exists(os.path.join(BASE_DIR, "improved_res.pth"))
+    v3       = os.path.exists(os.path.join(BASE_DIR, "protfunc_v3.pth"))
+    # model name reflects actual loaded model (improved takes priority)
+    if model_uses_supp:
+        name, version, active = "ProtFunc v3 (supplemented + mammal)", "3.0.0", "protfunc_v3"
     elif improved:
+        name, version, active = "ProtFunc Enhanced", "2.1.0", "improved"
     else:
+        name, version, active = "ProtFunc", "2.0.0", "baseline"
     return {
         "model_name":            name,
+        "model":                 active,
         "version":               version,
         "esm_model":             "esm2_t6_8M_UR50D",
         "embed_dim":             320,
                     })
                     prob_map[mlb.classes_[i]] = pv
             raw_preds.sort(key=lambda x: x["prob"], reverse=True)
+            # Cap to top 30 most confident direct predictions before propagation.
+            # Without this, models trained on propagated labels return hundreds of
+            # broad ancestor terms that overwhelm the signal.
+            raw_preds = raw_preds[:30]
+            for rp in raw_preds:
+                prob_map[rp["go_id"]] = rp["prob"]
             # Propagate up GO DAG and filter
             visible, suppressed = propagate_and_filter(
                     })
                     prob_map[mlb.classes_[i]] = pv
             raw_preds.sort(key=lambda x: x["prob"], reverse=True)
+            raw_preds = raw_preds[:30]
+            for rp in raw_preds:
+                prob_map[rp["go_id"]] = rp["prob"]
             visible, suppressed = propagate_and_filter(
                 raw_preds, go_parents, go_ancestors, prob_map