Spaces:
Sleeping
Sleeping
File size: 8,113 Bytes
10e9b7d eccf8e4 3c4371f d81521e 1056032 10e9b7d e80aab9 3db6293 e80aab9 d81521e d7b35af 1056032 7d75971 1056032 d81521e 1056032 d7b35af 7d75971 d7b35af 7d75971 d7b35af d81521e 1056032 7d75971 d7b35af d81521e 31243f4 d81521e d7b35af 7d75971 31243f4 7d75971 d7b35af 7d75971 1056032 7d75971 d7b35af 7d75971 1056032 f1ec76c 1056032 7d75971 f1ec76c b90251f d81521e 7d75971 3c4371f d7b35af d81521e 3c4371f d81521e e80aab9 d81521e 31243f4 d81521e d7b35af 1056032 d7b35af 7d75971 d81521e 7d75971 3c4371f 7d65c66 31243f4 eccf8e4 d81521e 31243f4 d7b35af 31243f4 d7b35af e80aab9 d81521e 3c4371f d7b35af d81521e f1ec76c d81521e 31243f4 1056032 7d65c66 31243f4 d7b35af 7d75971 f1ec76c 31243f4 d81521e 31243f4 7d75971 7d65c66 d7b35af e80aab9 7d65c66 e80aab9 f1ec76c e80aab9 d81521e e80aab9 d7b35af 7d75971 d81521e d7b35af e80aab9 f1ec76c e80aab9 d7b35af 1056032 d7b35af 1056032 d7b35af f1ec76c 31243f4 9088b99 7d65c66 e80aab9 f1ec76c e80aab9 3c4371f 1056032 d7b35af 1056032 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | import os
import gradio as gr
import requests
import pandas as pd
from dotenv import load_dotenv
import traceback
from typing import Optional # Keep this import, good practice
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Agent Integration ---
AGENT_AVAILABLE = False
AGENT_LOAD_ERROR = ""
AGENT_FUNCTION_NAME = "run_gaia_task" # Define the target function name
try:
from final_agent import run_gaia_task
print(f"Successfully imported {AGENT_FUNCTION_NAME} from final_agent.py")
AGENT_AVAILABLE = True
except ImportError as e:
error_msg = f"ERROR: Could not import {AGENT_FUNCTION_NAME} from final_agent.py: {e}"
print(error_msg)
AGENT_LOAD_ERROR = error_msg
except Exception as e:
error_msg = f"ERROR during import or initial setup in final_agent.py: {e}"
print(error_msg)
traceback.print_exc()
AGENT_LOAD_ERROR = error_msg
if not AGENT_AVAILABLE:
def run_gaia_task(task_description: str) -> str:
"""Dummy function used when the real agent fails to load."""
print(f"Executing dummy {AGENT_FUNCTION_NAME} because agent failed to load.")
return f"ERROR: Agent function '{AGENT_FUNCTION_NAME}' could not be loaded. Details: {AGENT_LOAD_ERROR}"
# --- Agent Runner Class ---
class AgentRunner:
def __init__(self):
print("AgentRunner initialized.")
if not AGENT_AVAILABLE:
print(f"WARNING: Agent function failed to load during startup. Error: {AGENT_LOAD_ERROR}")
def __call__(self, question: str) -> str:
"""Runs the imported agent function on a single question."""
print(f"\n--- AgentRunner received question: {question[:100]}... ---")
try:
final_answer = run_gaia_task(task_description=question)
final_answer_str = str(final_answer)
print(f"--- AgentRunner returning answer: {final_answer_str} ---")
return final_answer_str
except Exception as e:
print(f"!!! ERROR calling {AGENT_FUNCTION_NAME} function: {e} !!!")
traceback.print_exc()
return f"ERROR: Agent function '{AGENT_FUNCTION_NAME}' failed during execution - {e}"
# --- Submission Logic ---
def run_and_submit_all( profile: gr.OAuthProfile | None):
"""Fetches questions, runs agent, submits answers."""
space_id = os.getenv("SPACE_ID")
if not profile: print("User not logged in."); return "Please Login.", None
username= f"{profile.username}"; print(f"User logged in: {username}")
api_url = DEFAULT_API_URL; questions_url = f"{api_url}/questions"; submit_url = f"{api_url}/submit"
# 1. Instantiate Agent Runner
try:
agent = AgentRunner()
if not AGENT_AVAILABLE:
return f"Agent function '{AGENT_FUNCTION_NAME}' failed to load. Check logs. Error: {AGENT_LOAD_ERROR}", None
except Exception as e: print(f"Error instantiating AgentRunner: {e}"); return f"Init error: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Code URL N/A"
print(f"Agent code reference: {agent_code}")
# 2. Fetch Questions
print(f"Fetching questions from: {questions_url}")
try:
response = requests.get(questions_url, timeout=30); response.raise_for_status()
questions_data = response.json()
if not questions_data: print("Questions list empty."); return "Questions list empty.", None
print(f"Fetched {len(questions_data)} questions.")
except Exception as e: print(f"Error fetching questions: {e}"); return f"Fetch error: {e}", None
# 3. Run Agent on each question
results_log = []; answers_payload = []
print(f"Running agent on {len(questions_data)} questions...")
question_count = len(questions_data)
for i, item in enumerate(questions_data):
task_id = item.get("task_id"); question_text = item.get("question")
print(f"\n--- Processing Question {i+1}/{question_count} (ID: {task_id}) ---")
if not task_id or question_text is None: print(f"Skipping item: {item}"); continue
try:
submitted_answer = agent(question_text)
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
except Exception as e:
print(f"!! Error running agent on task {task_id}: {e} !!"); traceback.print_exc()
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT RUN ERROR: {e}"})
answers_payload.append({"task_id": task_id, "submitted_answer": f"AGENT RUN ERROR: {e}"})
if not answers_payload: print("Agent produced no answers."); return "Agent produced no answers.", pd.DataFrame(results_log)
# 4. Prepare Submission
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
print(f"\nSubmitting {len(answers_payload)} answers for user '{username}'...")
# 5. Submit
try:
response = requests.post(submit_url, json=submission_data, timeout=120); response.raise_for_status()
result_data = response.json()
final_status = (f"Submission Successful!\nUser: {result_data.get('username')}\n"
f"Overall Score: {result_data.get('score', 'N/A')}% "
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
f"Message: {result_data.get('message', 'N/A')}")
print("Submission successful."); results_df = pd.DataFrame(results_log); return final_status, results_df
except requests.exceptions.HTTPError as e:
error_detail = f"Server error {e.response.status_code}."
try: error_json = e.response.json(); error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
except requests.exceptions.JSONDecodeError: error_detail += f" Response: {e.response.text[:200]}"
status_message = f"Submission Failed: {error_detail}"
except requests.exceptions.Timeout:
status_message = "Submission Failed: The request timed out (120 seconds)."
except Exception as e: status_message = f"Submission Failed: Unexpected error - {e}"; traceback.print_exc()
print(status_message); results_df = pd.DataFrame(results_log); return status_message, results_df
# --- Build Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown("# GAIA Agent Evaluation Runner")
gr.Markdown(
"""
**Instructions:**
1. Ensure your agent logic is in `final_agent.py` (exposing the `run_gaia_task` function) and dependencies in `requirements.txt`. Set secrets in Space settings (GROQ_API_KEY, TAVILY_API_KEY, OPENAI_API_KEY).
2. Log in to Hugging Face using the button below.
3. Click 'Run Evaluation & Submit All Answers' to run your agent. Check Logs for detailed progress.
---
**Disclaimers:** Execution can take significant time depending on the number of questions and agent complexity.
"""
)
login_button = gr.LoginButton() # Assign to variable to access profile info implicitly
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
# --- CORRECTED LINE ---
# Remove the 'inputs' argument. The profile is passed implicitly because of LoginButton.
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
# --- Main execution block ---
if __name__ == "__main__":
print("\n" + "-"*30 + " App Starting " + "-"*30)
if not AGENT_AVAILABLE:
print(f"CRITICAL WARNING: Agent function '{AGENT_FUNCTION_NAME}' could not be loaded. The app will run but agent calls will fail.")
print(f"Load Error Details: {AGENT_LOAD_ERROR}")
print("Launching Gradio Interface...")
demo.launch(debug=False, share=False) |