Wendy-Fly
/

Sound

Wendy-Fly commited on 24 days ago

Commit

bddfa08

verified ·

1 Parent(s): 58b909f

add LLM (gemini/gpt) prediction columns to comparison

Files changed (1) hide show

embedding_transform_eval.py CHANGED Viewed

@@ -161,6 +161,28 @@ def main():
     # --- knn average top-100 ruler score ---
     methods["kNN-100 mean(ruler_score)"] = top_scores.mean(axis=1)
     # ---- 评分输出 ----
     print(f"\n{'method':<40}{'best F1':>10}{'thr':>10}{'P':>9}{'R':>9}{'AUC?':>10}")
     print("-" * 88)

     # --- knn average top-100 ruler score ---
     methods["kNN-100 mean(ruler_score)"] = top_scores.mean(axis=1)
+    # --- LLM 列（如果 csv 里带了 AIPF 跑出来的位置/score）---
+    BOUNDARY_SCORE_DEFAULT = 44.72
+    llm_cols = [
+        ("score_gemini_2.5_flash",     None),       # 已经是 score，越大越严
+        ("position_gemini_2.5_flash",  "neg"),      # position 越小越严，取负
+        ("score_gpt_4.1",              None),
+        ("position_gpt_4.1",           "neg"),
+    ]
+    for col, mode in llm_cols:
+        if col not in df.columns:
+            continue
+        raw = pd.to_numeric(df[col], errors="coerce").values
+        # NaN 用列中位数填，避免阈值扫描出问题
+        med = np.nanmedian(raw)
+        if np.isnan(med):
+            continue
+        raw = np.where(np.isnan(raw), med, raw)
+        if mode == "neg":
+            methods[f"LLM: {col} (-position)"] = -raw
+        else:
+            methods[f"LLM: {col}"] = raw
     # ---- 评分输出 ----
     print(f"\n{'method':<40}{'best F1':>10}{'thr':>10}{'P':>9}{'R':>9}{'AUC?':>10}")
     print("-" * 88)