Wendy-Fly commited on
Commit
bddfa08
·
verified ·
1 Parent(s): 58b909f

add LLM (gemini/gpt) prediction columns to comparison

Browse files
Files changed (1) hide show
  1. embedding_transform_eval.py +22 -0
embedding_transform_eval.py CHANGED
@@ -161,6 +161,28 @@ def main():
161
  # --- knn average top-100 ruler score ---
162
  methods["kNN-100 mean(ruler_score)"] = top_scores.mean(axis=1)
163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  # ---- 评分输出 ----
165
  print(f"\n{'method':<40}{'best F1':>10}{'thr':>10}{'P':>9}{'R':>9}{'AUC?':>10}")
166
  print("-" * 88)
 
161
  # --- knn average top-100 ruler score ---
162
  methods["kNN-100 mean(ruler_score)"] = top_scores.mean(axis=1)
163
 
164
+ # --- LLM 列(如果 csv 里带了 AIPF 跑出来的位置/score)---
165
+ BOUNDARY_SCORE_DEFAULT = 44.72
166
+ llm_cols = [
167
+ ("score_gemini_2.5_flash", None), # 已经是 score,越大越严
168
+ ("position_gemini_2.5_flash", "neg"), # position 越小越严,取负
169
+ ("score_gpt_4.1", None),
170
+ ("position_gpt_4.1", "neg"),
171
+ ]
172
+ for col, mode in llm_cols:
173
+ if col not in df.columns:
174
+ continue
175
+ raw = pd.to_numeric(df[col], errors="coerce").values
176
+ # NaN 用列中位数填,避免阈值扫描出问题
177
+ med = np.nanmedian(raw)
178
+ if np.isnan(med):
179
+ continue
180
+ raw = np.where(np.isnan(raw), med, raw)
181
+ if mode == "neg":
182
+ methods[f"LLM: {col} (-position)"] = -raw
183
+ else:
184
+ methods[f"LLM: {col}"] = raw
185
+
186
  # ---- 评分输出 ----
187
  print(f"\n{'method':<40}{'best F1':>10}{'thr':>10}{'P':>9}{'R':>9}{'AUC?':>10}")
188
  print("-" * 88)