File size: 8,113 Bytes
10e9b7d
 
eccf8e4
3c4371f
d81521e
 
1056032
10e9b7d
e80aab9
3db6293
e80aab9
d81521e
d7b35af
 
1056032
 
7d75971
1056032
 
d81521e
 
1056032
d7b35af
 
7d75971
d7b35af
 
7d75971
d7b35af
d81521e
 
1056032
 
 
 
7d75971
d7b35af
d81521e
31243f4
d81521e
 
d7b35af
7d75971
31243f4
7d75971
d7b35af
7d75971
1056032
7d75971
d7b35af
7d75971
 
1056032
f1ec76c
1056032
7d75971
f1ec76c
b90251f
d81521e
7d75971
3c4371f
d7b35af
d81521e
3c4371f
d81521e
e80aab9
d81521e
31243f4
d81521e
d7b35af
1056032
d7b35af
7d75971
d81521e
7d75971
3c4371f
7d65c66
31243f4
eccf8e4
d81521e
31243f4
d7b35af
31243f4
d7b35af
e80aab9
d81521e
 
3c4371f
d7b35af
 
d81521e
f1ec76c
d81521e
31243f4
1056032
7d65c66
 
31243f4
d7b35af
7d75971
f1ec76c
31243f4
d81521e
31243f4
7d75971
7d65c66
d7b35af
e80aab9
7d65c66
e80aab9
f1ec76c
e80aab9
d81521e
 
 
 
 
e80aab9
d7b35af
7d75971
d81521e
 
d7b35af
 
 
 
e80aab9
 
f1ec76c
e80aab9
d7b35af
 
 
 
1056032
d7b35af
 
 
1056032
d7b35af
 
f1ec76c
31243f4
9088b99
7d65c66
e80aab9
f1ec76c
 
 
 
 
e80aab9
3c4371f
1056032
 
 
d7b35af
1056032
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import gradio as gr
import requests
import pandas as pd
from dotenv import load_dotenv
import traceback
from typing import Optional # Keep this import, good practice

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Agent Integration ---
AGENT_AVAILABLE = False
AGENT_LOAD_ERROR = ""
AGENT_FUNCTION_NAME = "run_gaia_task" # Define the target function name

try:
    from final_agent import run_gaia_task
    print(f"Successfully imported {AGENT_FUNCTION_NAME} from final_agent.py")
    AGENT_AVAILABLE = True
except ImportError as e:
    error_msg = f"ERROR: Could not import {AGENT_FUNCTION_NAME} from final_agent.py: {e}"
    print(error_msg)
    AGENT_LOAD_ERROR = error_msg
except Exception as e:
    error_msg = f"ERROR during import or initial setup in final_agent.py: {e}"
    print(error_msg)
    traceback.print_exc()
    AGENT_LOAD_ERROR = error_msg

if not AGENT_AVAILABLE:
    def run_gaia_task(task_description: str) -> str:
        """Dummy function used when the real agent fails to load."""
        print(f"Executing dummy {AGENT_FUNCTION_NAME} because agent failed to load.")
        return f"ERROR: Agent function '{AGENT_FUNCTION_NAME}' could not be loaded. Details: {AGENT_LOAD_ERROR}"

# --- Agent Runner Class ---
class AgentRunner:
    def __init__(self):
        print("AgentRunner initialized.")
        if not AGENT_AVAILABLE:
            print(f"WARNING: Agent function failed to load during startup. Error: {AGENT_LOAD_ERROR}")

    def __call__(self, question: str) -> str:
        """Runs the imported agent function on a single question."""
        print(f"\n--- AgentRunner received question: {question[:100]}... ---")
        try:
            final_answer = run_gaia_task(task_description=question)
            final_answer_str = str(final_answer)
            print(f"--- AgentRunner returning answer: {final_answer_str} ---")
            return final_answer_str
        except Exception as e:
             print(f"!!! ERROR calling {AGENT_FUNCTION_NAME} function: {e} !!!")
             traceback.print_exc()
             return f"ERROR: Agent function '{AGENT_FUNCTION_NAME}' failed during execution - {e}"

# --- Submission Logic ---
def run_and_submit_all( profile: gr.OAuthProfile | None):
    """Fetches questions, runs agent, submits answers."""
    space_id = os.getenv("SPACE_ID")

    if not profile: print("User not logged in."); return "Please Login.", None
    username= f"{profile.username}"; print(f"User logged in: {username}")

    api_url = DEFAULT_API_URL; questions_url = f"{api_url}/questions"; submit_url = f"{api_url}/submit"

    # 1. Instantiate Agent Runner
    try:
        agent = AgentRunner()
        if not AGENT_AVAILABLE:
             return f"Agent function '{AGENT_FUNCTION_NAME}' failed to load. Check logs. Error: {AGENT_LOAD_ERROR}", None
    except Exception as e: print(f"Error instantiating AgentRunner: {e}"); return f"Init error: {e}", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Code URL N/A"
    print(f"Agent code reference: {agent_code}")

    # 2. Fetch Questions
    print(f"Fetching questions from: {questions_url}")
    try:
        response = requests.get(questions_url, timeout=30); response.raise_for_status()
        questions_data = response.json()
        if not questions_data: print("Questions list empty."); return "Questions list empty.", None
        print(f"Fetched {len(questions_data)} questions.")
    except Exception as e: print(f"Error fetching questions: {e}"); return f"Fetch error: {e}", None

    # 3. Run Agent on each question
    results_log = []; answers_payload = []
    print(f"Running agent on {len(questions_data)} questions...")
    question_count = len(questions_data)
    for i, item in enumerate(questions_data):
        task_id = item.get("task_id"); question_text = item.get("question")
        print(f"\n--- Processing Question {i+1}/{question_count} (ID: {task_id}) ---")
        if not task_id or question_text is None: print(f"Skipping item: {item}"); continue
        try:
            submitted_answer = agent(question_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
        except Exception as e:
             print(f"!! Error running agent on task {task_id}: {e} !!"); traceback.print_exc()
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT RUN ERROR: {e}"})
             answers_payload.append({"task_id": task_id, "submitted_answer": f"AGENT RUN ERROR: {e}"})

    if not answers_payload: print("Agent produced no answers."); return "Agent produced no answers.", pd.DataFrame(results_log)

    # 4. Prepare Submission
    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
    print(f"\nSubmitting {len(answers_payload)} answers for user '{username}'...")

    # 5. Submit
    try:
        response = requests.post(submit_url, json=submission_data, timeout=120); response.raise_for_status()
        result_data = response.json()
        final_status = (f"Submission Successful!\nUser: {result_data.get('username')}\n"
                        f"Overall Score: {result_data.get('score', 'N/A')}% "
                        f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
                        f"Message: {result_data.get('message', 'N/A')}")
        print("Submission successful."); results_df = pd.DataFrame(results_log); return final_status, results_df
    except requests.exceptions.HTTPError as e:
        error_detail = f"Server error {e.response.status_code}."
        try: error_json = e.response.json(); error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
        except requests.exceptions.JSONDecodeError: error_detail += f" Response: {e.response.text[:200]}"
        status_message = f"Submission Failed: {error_detail}"
    except requests.exceptions.Timeout:
        status_message = "Submission Failed: The request timed out (120 seconds)."
    except Exception as e: status_message = f"Submission Failed: Unexpected error - {e}"; traceback.print_exc()
    print(status_message); results_df = pd.DataFrame(results_log); return status_message, results_df


# --- Build Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# GAIA Agent Evaluation Runner")
    gr.Markdown(
        """
        **Instructions:**
        1. Ensure your agent logic is in `final_agent.py` (exposing the `run_gaia_task` function) and dependencies in `requirements.txt`. Set secrets in Space settings (GROQ_API_KEY, TAVILY_API_KEY, OPENAI_API_KEY).
        2. Log in to Hugging Face using the button below.
        3. Click 'Run Evaluation & Submit All Answers' to run your agent. Check Logs for detailed progress.
        ---
        **Disclaimers:** Execution can take significant time depending on the number of questions and agent complexity.
        """
    )
    login_button = gr.LoginButton() # Assign to variable to access profile info implicitly
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    # --- CORRECTED LINE ---
    # Remove the 'inputs' argument. The profile is passed implicitly because of LoginButton.
    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

# --- Main execution block ---
if __name__ == "__main__":
    print("\n" + "-"*30 + " App Starting " + "-"*30)
    if not AGENT_AVAILABLE:
        print(f"CRITICAL WARNING: Agent function '{AGENT_FUNCTION_NAME}' could not be loaded. The app will run but agent calls will fail.")
        print(f"Load Error Details: {AGENT_LOAD_ERROR}")
    print("Launching Gradio Interface...")
    demo.launch(debug=False, share=False)