Wendy-Fly
/

Sound

Wendy-Fly commited on 19 days ago

Commit

2c780f9

verified ·

1 Parent(s): 500dff7

default output to AIPF csv path (in-place overwrite, no cp needed)

Files changed (1) hide show

add_estimated_position.py CHANGED Viewed

@@ -2,19 +2,21 @@
 """把 embedding top-K match 估出来的整数 position 写回 golden_set.csv。
 默认列名固定为 estimated_position（下游 AIPF warm-start 代码读这个名）。
-每次跑都覆盖这一列，用 --k 切换不同邻居数即可。
-用法：
-  # top-100 邻居均值（默认 K）
-  python3 add_estimated_position.py --k 100
-  # top-5 邻居均值
   python3 add_estimated_position.py --k 5
-  # top-1（最近邻 rank）
   python3 add_estimated_position.py --k 1
-可选：想同时存多列对比（不覆盖），手动指定列名：
   python3 add_estimated_position.py --k 5 --new-col estimated_position_top5
 """
 import argparse
@@ -28,7 +30,8 @@ import pandas as pd
 DEFAULTS = dict(
     csv    = "/mnt/bn/tns-algo-ue-my/biaowu/aipf_dm_metric/example/yss_ruler_eval/data/aipf_golden_set.csv",
     jsonl  = "golden_top100.jsonl",
-    output = "golden_set_with_estimated.csv",
     id_col = "task_id",
 )

 """把 embedding top-K match 估出来的整数 position 写回 golden_set.csv。
 默认列名固定为 estimated_position（下游 AIPF warm-start 代码读这个名）。
+默认输入/输出都指向 AIPF 流水线实际读取的那个 csv，**直接原地覆盖**，
+省掉 `cp` 到 example/yss_ruler_eval/data/ 这一步。
+每次跑都覆盖 estimated_position 列；用 --k 切换不同邻居数即可，
+其它列保持不变。
+用法：
+  python3 add_estimated_position.py --k 100   # 默认 K
   python3 add_estimated_position.py --k 5
   python3 add_estimated_position.py --k 1
+⚠️ 第一次跑前建议备份原 csv：
+  cp /mnt/.../aipf_golden_set.csv /mnt/.../aipf_golden_set.csv.bak
+可选：想同时存多列对比（不覆盖默认列），手动指定列名：
   python3 add_estimated_position.py --k 5 --new-col estimated_position_top5
 """
 import argparse
 DEFAULTS = dict(
     csv    = "/mnt/bn/tns-algo-ue-my/biaowu/aipf_dm_metric/example/yss_ruler_eval/data/aipf_golden_set.csv",
     jsonl  = "golden_top100.jsonl",
+    # 输出直接覆盖 AIPF 流水线实际读取的那个 csv，省掉 cp 那一步
+    output = "/mnt/bn/tns-algo-ue-my/biaowu/aipf_dm_metric/example/yss_ruler_eval/data/aipf_golden_set.csv",
     id_col = "task_id",
 )