import gradio as gr from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns from src.about import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, TITLE, ) from src.display.css_html_js import custom_css from src.display.utils import ( CATEGORY_ACCURACY_COLS, CATEGORY_F1_COLS, OVERALL_TIER_COLS, CategoryAccuracyColumn, CategoryF1Column, OverallTierColumn, fields, ) from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN from src.populate import get_leaderboard_df def restart_space(): API.restart_space(repo_id=REPO_ID) OVERALL_TIER_LEADERBOARD_DF = get_leaderboard_df( EVAL_RESULTS_PATH + "/ARFBench_leaderboard.csv", EVAL_REQUESTS_PATH, OVERALL_TIER_COLS, OVERALL_TIER_COLS, sort_by="accuracy", ) CATEGORY_F1_LEADERBOARD_DF = get_leaderboard_df( EVAL_RESULTS_PATH + "/ARFBench_leaderboard_category_f1.csv", EVAL_REQUESTS_PATH, CATEGORY_F1_COLS, CATEGORY_F1_COLS, sort_by="overall_f1", ) CATEGORY_ACCURACY_LEADERBOARD_DF = get_leaderboard_df( EVAL_RESULTS_PATH + "/ARFBench_leaderboard_category_accuracy.csv", EVAL_REQUESTS_PATH, CATEGORY_ACCURACY_COLS, CATEGORY_ACCURACY_COLS, sort_by="overall_accuracy", ) def init_custom_leaderboard(dataframe, column_class, filter_column_name, filter_label): if dataframe is None or dataframe.empty: raise ValueError("Leaderboard DataFrame is empty or None.") return Leaderboard( value=dataframe, datatype=[c.type for c in fields(column_class)], select_columns=SelectColumns( default_selection=[c.name for c in fields(column_class) if c.displayed_by_default], cant_deselect=[c.name for c in fields(column_class) if c.never_hidden], label="Select Columns to Display:", ), search_columns=[column_class.model.name], hide_columns=[c.name for c in fields(column_class) if c.hidden], filter_columns=[ ColumnFilter( filter_column_name, type="slider", min=0, max=100, label=filter_label, ), ], bool_checkboxgroup_label="Hide models", interactive=False, ) demo = gr.Blocks(css=custom_css) with demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("🏅 ARFBench Leaderboard", elem_id="arfbench-tab-table", id=0): with gr.Tabs(selected=0): with gr.TabItem("Overall + Tier (Default)", id=0): leaderboard_overall_tier = init_custom_leaderboard( OVERALL_TIER_LEADERBOARD_DF, OverallTierColumn, OverallTierColumn.overall_f1.name, "Overall F1 score", ) with gr.TabItem("Per-Category F1", id=1): leaderboard_category_f1 = init_custom_leaderboard( CATEGORY_F1_LEADERBOARD_DF, CategoryF1Column, CategoryF1Column.overall_f1.name, "Overall F1 score", ) with gr.TabItem("Per-Category Accuracy", id=2): leaderboard_category_accuracy = init_custom_leaderboard( CATEGORY_ACCURACY_LEADERBOARD_DF, CategoryAccuracyColumn, CategoryAccuracyColumn.overall_accuracy.name, "Overall Accuracy score", ) with gr.TabItem("📝 About", elem_id="about-tab-table", id=1): gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") with gr.Row(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=20, elem_id="citation-button", show_copy_button=True, ) scheduler = None demo.queue(default_concurrency_limit=40) if __name__ == "__main__": demo.launch()