add LLM (gemini/gpt) prediction columns to comparison
Browse files- embedding_transform_eval.py +22 -0
embedding_transform_eval.py
CHANGED
|
@@ -161,6 +161,28 @@ def main():
|
|
| 161 |
# --- knn average top-100 ruler score ---
|
| 162 |
methods["kNN-100 mean(ruler_score)"] = top_scores.mean(axis=1)
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
# ---- 评分输出 ----
|
| 165 |
print(f"\n{'method':<40}{'best F1':>10}{'thr':>10}{'P':>9}{'R':>9}{'AUC?':>10}")
|
| 166 |
print("-" * 88)
|
|
|
|
| 161 |
# --- knn average top-100 ruler score ---
|
| 162 |
methods["kNN-100 mean(ruler_score)"] = top_scores.mean(axis=1)
|
| 163 |
|
| 164 |
+
# --- LLM 列(如果 csv 里带了 AIPF 跑出来的位置/score)---
|
| 165 |
+
BOUNDARY_SCORE_DEFAULT = 44.72
|
| 166 |
+
llm_cols = [
|
| 167 |
+
("score_gemini_2.5_flash", None), # 已经是 score,越大越严
|
| 168 |
+
("position_gemini_2.5_flash", "neg"), # position 越小越严,取负
|
| 169 |
+
("score_gpt_4.1", None),
|
| 170 |
+
("position_gpt_4.1", "neg"),
|
| 171 |
+
]
|
| 172 |
+
for col, mode in llm_cols:
|
| 173 |
+
if col not in df.columns:
|
| 174 |
+
continue
|
| 175 |
+
raw = pd.to_numeric(df[col], errors="coerce").values
|
| 176 |
+
# NaN 用列中位数填,避免阈值扫描出问题
|
| 177 |
+
med = np.nanmedian(raw)
|
| 178 |
+
if np.isnan(med):
|
| 179 |
+
continue
|
| 180 |
+
raw = np.where(np.isnan(raw), med, raw)
|
| 181 |
+
if mode == "neg":
|
| 182 |
+
methods[f"LLM: {col} (-position)"] = -raw
|
| 183 |
+
else:
|
| 184 |
+
methods[f"LLM: {col}"] = raw
|
| 185 |
+
|
| 186 |
# ---- 评分输出 ----
|
| 187 |
print(f"\n{'method':<40}{'best F1':>10}{'thr':>10}{'P':>9}{'R':>9}{'AUC?':>10}")
|
| 188 |
print("-" * 88)
|