| | import json |
| | import os |
| | import gradio as gr |
| |
|
| | |
| | datasets = {} |
| | difficulty_files = { |
| | "Easy": "data/scalar_easy.json", |
| | "Medium": "data/scalar_medium.json", |
| | "Hard": "data/scalar_hard.json" |
| | } |
| |
|
| | |
| | for difficulty, filepath in difficulty_files.items(): |
| | with open(filepath) as f: |
| | datasets[difficulty] = json.load(f) |
| |
|
| | |
| | current_index = 0 |
| | current_difficulty = "Easy" |
| | current_data = datasets[current_difficulty] |
| |
|
| | |
| | def update_display(index, difficulty=None): |
| | global current_index, current_difficulty, current_data |
| | |
| | |
| | if difficulty is not None and difficulty != current_difficulty: |
| | current_difficulty = difficulty |
| | current_data = datasets[difficulty] |
| | current_index = 0 |
| | index = 0 |
| | |
| | current_index = index |
| | item = current_data[current_index] |
| | |
| | |
| | question = item['question'].replace("<", "<").replace(">", ">") |
| | |
| | |
| | colored_question = question.replace( |
| | "[MASKED_CITATION]", |
| | '<span style="background-color: #ffebeb; padding: 2px 6px; border-radius: 4px; color: blue;">[MASKED_CITATION]</span>' |
| | ) |
| | |
| | |
| | formatted_citation = json.dumps(item["citation"], indent=4, ensure_ascii=False) |
| | return colored_question, formatted_citation, item["reference_id"], item['label_id'], index |
| |
|
| | def prev_item(difficulty): |
| | global current_index, current_data |
| | current_data = datasets[difficulty] |
| | current_index = (current_index - 1) % len(current_data) |
| | return update_display(current_index, difficulty) |
| |
|
| | def next_item(difficulty): |
| | global current_index, current_data |
| | current_data = datasets[difficulty] |
| | current_index = (current_index + 1) % len(current_data) |
| | return update_display(current_index, difficulty) |
| |
|
| | |
| | with gr.Blocks() as demo: |
| | gr.Markdown(""" |
| | ## Data viewer for [SCALAR](https://arxiv.org/abs/2502.13753/) |
| | * SCALAR (Scientific Citation-based Live Assessment of Long-context Academic Reasoning) is a novel benchmark that leverages academic papers and their citation networks, featuring automatic generation of high-quality ground truth labels without human annotation, controllable difficulty levels, and a dynamic updating mechanism that prevents data contamination. |
| | * The benchmark provides a reliable and sustainable way to track progress in long-context understanding as LLM capabilities evolve. |
| | """) |
| | with gr.Column(): |
| | with gr.Row(): |
| | |
| | |
| | with gr.Column(): |
| |
|
| | |
| | difficulty_dropdown = gr.Dropdown( |
| | choices=list(difficulty_files.keys()), |
| | value="Easy", |
| | label="Difficulty Level" |
| | ) |
| |
|
| | index_slider = gr.Slider(minimum=0, maximum=len(datasets["Easy"]) - 1, step=1, label="Select Index", value=0) |
| | with gr.Row(): |
| | prev_btn = gr.Button("Previous") |
| | next_btn = gr.Button("Next") |
| | |
| | |
| | with gr.Column(): |
| | citation_text = gr.TextArea(label="Citation", interactive=False) |
| | reference_id = gr.Textbox(label="Reference ID", interactive=False) |
| | label_id = gr.Textbox(label="Label ID", interactive=False) |
| |
|
| | with gr.Row(): |
| | question_md = gr.Markdown(label="Question") |
| |
|
| | |
| | difficulty_dropdown.change( |
| | fn=lambda d: gr.Slider(minimum=0, maximum=len(datasets[d]) - 1, step=1, value=0), |
| | inputs=[difficulty_dropdown], |
| | outputs=[index_slider] |
| | ) |
| | |
| | index_slider.change( |
| | update_display, |
| | inputs=[index_slider, difficulty_dropdown], |
| | outputs=[question_md, citation_text, reference_id, label_id, index_slider] |
| | ) |
| | |
| | prev_btn.click( |
| | prev_item, |
| | inputs=[difficulty_dropdown], |
| | outputs=[question_md, citation_text, reference_id, label_id, index_slider] |
| | ) |
| | |
| | next_btn.click( |
| | next_item, |
| | inputs=[difficulty_dropdown], |
| | outputs=[question_md, citation_text, reference_id, label_id, index_slider] |
| | ) |
| |
|
| | |
| | demo.load( |
| | update_display, |
| | inputs=[index_slider, difficulty_dropdown], |
| | outputs=[question_md, citation_text, reference_id, label_id, index_slider] |
| | ) |
| |
|
| | |
| | demo.launch() |
| |
|