| | |
| |
|
| | import os |
| | import time |
| | import traceback |
| | import requests |
| | import pandas as pd |
| | import gradio as gr |
| |
|
| | |
| | API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") |
| | SPACE_ID = os.getenv("SPACE_ID", "framsouza/Final_Assignment_Template") |
| | MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Llama-2-7b-instruct") |
| | HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") |
| |
|
| | if not HF_TOKEN or not SPACE_ID: |
| | raise RuntimeError( |
| | "β Please set both SPACE_ID and HUGGINGFACEHUB_API_TOKEN in your Space Secrets." |
| | ) |
| |
|
| | HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} |
| |
|
| | WELCOME = """ |
| | ## GAIA Benchmark Runner π |
| | |
| | Build your agent, score **β₯30%** to earn your Certificate, |
| | and see where you land on the Student Leaderboard! |
| | """ |
| |
|
| | |
| | class GAIAAgent: |
| | def __init__(self, model_id: str): |
| | print(f"[DEBUG] Initializing GAIAAgent with model={model_id}") |
| | self.model_id = model_id |
| | self.headers = HEADERS |
| |
|
| | def answer(self, prompt: str) -> str: |
| | payload = { |
| | "inputs": prompt, |
| | "parameters": { |
| | "max_new_tokens": 512, |
| | "temperature": 0.2 |
| | } |
| | } |
| | url = f"https://api-inference.huggingface.co/models/{self.model_id}" |
| | resp = requests.post(url, headers=self.headers, json=payload, timeout=60) |
| | resp.raise_for_status() |
| | data = resp.json() |
| | if isinstance(data, list) and data and "generated_text" in data[0]: |
| | return data[0]["generated_text"].strip() |
| | return str(data) |
| |
|
| | |
| | def run_and_submit_all(): |
| | try: |
| | |
| | who = requests.get("https://huggingface.co/api/whoami-v2", headers=HEADERS, timeout=10) |
| | who.raise_for_status() |
| | username = who.json().get("user", {}).get("username") |
| | if not username: |
| | return "β Could not fetch your HF username. Check your token.", pd.DataFrame() |
| |
|
| | |
| | q_resp = requests.get(f"{API_URL}/questions", timeout=15) |
| | q_resp.raise_for_status() |
| | questions = q_resp.json() or [] |
| | if not questions: |
| | return "β No questions returned; check your API_URL.", pd.DataFrame() |
| |
|
| | |
| | agent = GAIAAgent(MODEL_ID) |
| | results = [] |
| | payload = [] |
| | for task in questions: |
| | tid = task["task_id"] |
| | q = task.get("question", "") |
| | try: |
| | ans = agent.answer(q) |
| | except Exception as e: |
| | ans = f"ERROR: {e}" |
| | results.append({"Task ID": tid, "Question": q, "Answer": ans}) |
| | payload.append({"task_id": tid, "submitted_answer": ans}) |
| | time.sleep(0.5) |
| |
|
| | |
| | submission = { |
| | "username": username, |
| | "agent_code": f"https://huggingface.co/spaces/{SPACE_ID}/tree/main", |
| | "answers": payload |
| | } |
| | s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60) |
| | s_resp.raise_for_status() |
| | data = s_resp.json() |
| |
|
| | |
| | status = ( |
| | f"β
**Submission Successful!**\n\n" |
| | f"**User:** {data.get('username')}\n" |
| | f"**Score:** {data.get('score')}% " |
| | f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n" |
| | f"**Message:** {data.get('message')}" |
| | ) |
| | return status, pd.DataFrame(results) |
| |
|
| | except Exception as e: |
| | tb = traceback.format_exc() |
| | print("[ERROR] Unhandled exception:\n", tb) |
| | return f"β Unexpected error:\n{e}\n\nSee logs for details.", pd.DataFrame() |
| |
|
| | |
| | with gr.Blocks() as demo: |
| | gr.Markdown(WELCOME) |
| | run_btn = gr.Button("βΆοΈ Run GAIA Benchmark") |
| | status = gr.Markdown() |
| | table_df = gr.Dataframe(headers=["Task ID", "Question", "Answer"], wrap=True) |
| |
|
| | run_btn.click( |
| | fn=run_and_submit_all, |
| | inputs=[], |
| | outputs=[status, table_df] |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|