| import pandas as pd |
|
|
| from src.display.formatting import make_clickable_model |
|
|
|
|
| def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame: |
| rename_map = { |
| "model type": "model_type", |
| "overall f1": "overall_f1", |
| "overall accuracy": "overall_accuracy", |
| "accuracy": "accuracy", |
| "tier i": "tier_i_f1", |
| "tier ii": "tier_ii_f1", |
| "tier iii": "tier_iii_f1", |
| "tier i accuracy": "tier_i_accuracy", |
| "tier ii accuracy": "tier_ii_accuracy", |
| "tier iii accuracy": "tier_iii_accuracy", |
| "start time": "start_time", |
| "end time": "end_time", |
| } |
|
|
| normalized = {} |
| for col in df.columns: |
| cleaned = col.strip().lower() |
| normalized[col] = rename_map.get(cleaned, cleaned.replace(" ", "_")) |
|
|
| return df.rename(columns=normalized) |
|
|
|
|
| def get_leaderboard_df( |
| results_path: str, |
| _requests_path: str, |
| _cols: list, |
| _benchmark_cols: list, |
| sort_by: str = "overall_f1", |
| ) -> pd.DataFrame: |
| """Creates a dataframe from a static CSV leaderboard file.""" |
| df = pd.read_csv(results_path) |
| df = _normalize_columns(df) |
|
|
| if "model" in df.columns: |
| df["model"] = df["model"].apply(make_clickable_model) |
|
|
| if sort_by in df.columns: |
| df = df.sort_values(by=[sort_by], ascending=False) |
|
|
| return df |
|
|
|
|
| def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]: |
| """Creates empty dataframes for evaluation queues since we're using |
| static data""" |
| |
| empty_df = pd.DataFrame(columns=cols) |
| return empty_df, empty_df, empty_df |
|
|