Final_Assignment

Sleeping

App Files Files Community

BiGuan commited on 22 days ago

Commit

a223bb0

verified ·

1 Parent(s): 951a5f7

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -62

app.py CHANGED Viewed

@@ -12,14 +12,12 @@ from typing import TypedDict, Annotated, Sequence, List, Dict, Any, Generator
 from datetime import datetime
 import operator
-# LangChain / LangGraph
 from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage
 from langchain_core.tools import tool
 from langgraph.graph import StateGraph, END
 from langgraph.prebuilt import ToolNode
 from langchain_core.utils.function_calling import convert_to_openai_function
-# 其他工具依赖
 from bs4 import BeautifulSoup
 from youtube_transcript_api import YouTubeTranscriptApi
@@ -32,10 +30,9 @@ AGICTO_API_KEY = os.getenv("AGICTO_API_KEY", "")
 QWEN_MODEL = "qwen3.5-35b-a3b"
 # =============================================================================
-# 进度监控器
 # =============================================================================
 class ProgressMonitor:
-    # ... 保持不变 ...
     def __init__(self):
         self.current = 0
         self.total = 0
@@ -79,10 +76,9 @@ class ProgressMonitor:
             return html
 # =============================================================================
-# Qwen LLM 封装
 # =============================================================================
 class QwenLLM:
-    # ... 保持不变 ...
     def __init__(self, model=QWEN_MODEL):
         self.model = model
         self.api_key = AGICTO_API_KEY
@@ -187,7 +183,7 @@ class QwenLLM:
         return formatted
 # =============================================================================
-# 工具定义
 # =============================================================================
 api_url_tasks = DEFAULT_API_URL
@@ -197,7 +193,6 @@ def _get_api_base():
         base = base[:-3]
     return base
-# --- 原有工具 ---
 @tool(description="搜索互联网信息，返回相关摘要。")
 def web_search(query: str) -> str:
     try:
@@ -315,20 +310,13 @@ def download_file_for_task(task_id: str) -> str:
             os.unlink(temp_path)
             return result
         else:
-            # 对于文本文件（包括 .py, .txt 等），直接返回文本内容
             return resp.text[:4000]
     except Exception as e:
         return f"文件下载失败: {e}"
-# --- 新增：维基百科搜索工具 ---
 @tool(description="在维基百科中搜索关键词，返回页面摘要或详细信息。")
 def search_wikipedia(query: str) -> str:
-    """
-    使用维基百科 API 搜索关键词。
-    首先尝试 opensearch 获取页面标题，然后用 extract 获取摘要。
-    """
     try:
-        # 第一步：搜索相关页面标题
         search_url = "https://en.wikipedia.org/w/api.php"
         params = {
             "action": "opensearch",
@@ -338,11 +326,10 @@ def search_wikipedia(query: str) -> str:
         }
         resp = requests.get(search_url, params=params, timeout=10)
         data = resp.json()
-        titles = data[1]  # 标题列表
         if not titles:
             return "维基百科未找到相关页面。"
         title = titles[0]
-        # 第二步：获取页面摘要
         extract_params = {
             "action": "query",
             "prop": "extracts",
@@ -354,52 +341,55 @@ def search_wikipedia(query: str) -> str:
         resp2 = requests.get(search_url, params=extract_params, timeout=10)
         data2 = resp2.json()
         pages = data2.get("query", {}).get("pages", {})
-        for page_id, page_info in pages.items():
             extract = page_info.get("extract", "")
             if extract:
-                # 返回前2000字符，避免过长
                 return f"Wikipedia - {title}:\n{extract[:2000]}"
         return f"维基百科页面 '{title}' 未提供摘要。"
     except Exception as e:
         return f"维基百科搜索失败: {e}"
 # =============================================================================
-# LangGraph 状态与节点
 # =============================================================================
 class AgentState(TypedDict):
     messages: Annotated[Sequence[BaseMessage], operator.add]
     final_answer: str
     task_id: str
-    tool_attempts: int
-# 所有工具（包含新增的 search_wikipedia）
-tools = [
-    search_wikipedia,       # 优先搜索维基百科
-    web_search,             # 备用网络搜索
-    web_scraper,
-    calculator,
-    analyze_image,
-    transcribe_audio,
-    get_youtube_transcript,
-    download_file_for_task
-]
 tool_node = ToolNode(tools)
 llm = QwenLLM()
 functions = [convert_to_openai_function(t) for t in tools]
 llm_with_tools = llm.bind_functions(functions)
 def agent_node(state: AgentState) -> dict:
     messages = state["messages"]
     task_id = state.get("task_id", "")
-    # 更新系统提示，强调维基百科、文件处理和 YouTube 工具的使用
     sys_prompt = f"""You are a helpful assistant answering GAIA Level 1 questions.
-IMPORTANT GUIDELINES:
-- For fact-based questions, first try to find the answer using the `search_wikipedia` tool. Only if Wikipedia fails, use `web_search` or other tools.
-- If the question provides a file (image, audio, or code), use `download_file_for_task` with the given task_id to retrieve it. The tool will automatically analyze images, transcribe audio, or return text for Python/text files.
-- For YouTube links, use `get_youtube_transcript` to obtain the captions.
-- When you have the final answer, output ONLY the answer string (a word, number, short phrase, or letter). Do NOT include any extra text, explanations, or "FINAL ANSWER:".
-Current task ID: {task_id}. If the question requires a file, use download_file_for_task with task_id="{task_id}"."""
     full = [SystemMessage(content=sys_prompt)] + list(messages)
     response = llm_with_tools.invoke(full)
     return {"messages": [response]}
@@ -408,28 +398,26 @@ def should_continue(state: AgentState) -> str:
     messages = state["messages"]
     last = messages[-1]
     tool_attempts = state.get("tool_attempts", 0)
-    MAX_TOOL_CALLS = 3   # 限制最多3次工具调用，避免循环
     if tool_attempts >= MAX_TOOL_CALLS:
         return "finish"
     if hasattr(last, "additional_kwargs") and "function_call" in last.additional_kwargs:
         return "tools"
     tool_msg_count = sum(1 for m in messages if isinstance(m, ToolMessage))
     if tool_msg_count == 0:
         return "force_tool"
-    # 如果 LLM 已经给出了一个简洁答案，结束
-    content = last.content
-    if "?" not in content and len(content.strip()) < 100:
-        return "finish"
     return "finish"
 def force_tool_node(state: AgentState) -> dict:
     new_msg = HumanMessage(
-        content="You haven't used any tool yet. Please use an appropriate tool (e.g., search_wikipedia, download_file_for_task) to find the answer."
     )
     return {"messages": [new_msg]}
@@ -437,21 +425,31 @@ def increment_tool_count(state: AgentState) -> dict:
     return {"tool_attempts": state.get("tool_attempts", 0) + 1}
 def finish_node(state: AgentState) -> dict:
     last = state["messages"][-1]
     content = last.content
-    answer = content.strip().split("\n")[-1].strip()
-    if "FINAL ANSWER:" in answer:
-        answer = answer.split("FINAL ANSWER:")[-1].strip()
-    if not answer:
-        for m in reversed(state["messages"]):
-            if isinstance(m, AIMessage) and m.content.strip():
-                answer = m.content.strip().split("\n")[-1].strip()
                 break
-    if not answer:
-        if state.get("tool_attempts", 0) >= 3:
-            answer = "Unable to determine answer: max tool calls reached."
         else:
             answer = "Unable to determine answer: insufficient information."
@@ -461,19 +459,28 @@ def build_graph():
     workflow = StateGraph(AgentState)
     workflow.add_node("agent", agent_node)
     workflow.add_node("tools", tool_node)
-    workflow.add_node("finish", finish_node)
     workflow.add_node("force_tool", force_tool_node)
     workflow.add_node("count_tools", increment_tool_count)
     workflow.set_entry_point("agent")
     workflow.add_conditional_edges(
         "agent",
         should_continue,
-        {"tools": "tools", "force_tool": "force_tool", "finish": "finish"}
     )
     workflow.add_edge("tools", "count_tools")
     workflow.add_edge("count_tools", "agent")
     workflow.add_edge("force_tool", "agent")
     workflow.add_edge("finish", END)
     return workflow.compile()
@@ -581,8 +588,8 @@ with gr.Blocks(title="GAIA Agent") as demo:
     gr.Markdown("""
     # 🤖 GAIA Level 1 Agent (LangGraph + Qwen)
     **模型:** Qwen3.5-35B-A3B | **API:** agicto.com
-    点击按钮获取题目，Agent 自动调用工具并回答，最后提交评分。
-    **新增维基百科搜索、文件处理（图片/音频/代码）、YouTube 字幕提取。**
     """)
     gr.LoginButton()
     run_btn = gr.Button("🚀 运行评测并提交", variant="primary")

 from datetime import datetime
 import operator
 from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage
 from langchain_core.tools import tool
 from langgraph.graph import StateGraph, END
 from langgraph.prebuilt import ToolNode
 from langchain_core.utils.function_calling import convert_to_openai_function
 from bs4 import BeautifulSoup
 from youtube_transcript_api import YouTubeTranscriptApi
 QWEN_MODEL = "qwen3.5-35b-a3b"
 # =============================================================================
+# 进度监控器（不变）
 # =============================================================================
 class ProgressMonitor:
     def __init__(self):
         self.current = 0
         self.total = 0
             return html
 # =============================================================================
+# Qwen LLM 封装（不变）
 # =============================================================================
 class QwenLLM:
     def __init__(self, model=QWEN_MODEL):
         self.model = model
         self.api_key = AGICTO_API_KEY
         return formatted
 # =============================================================================
+# 工具定义（同之前，包含 search_wikipedia 等）
 # =============================================================================
 api_url_tasks = DEFAULT_API_URL
         base = base[:-3]
     return base
 @tool(description="搜索互联网信息，返回相关摘要。")
 def web_search(query: str) -> str:
     try:
             os.unlink(temp_path)
             return result
         else:
             return resp.text[:4000]
     except Exception as e:
         return f"文件下载失败: {e}"
 @tool(description="在维基百科中搜索关键词，返回页面摘要或详细信息。")
 def search_wikipedia(query: str) -> str:
     try:
         search_url = "https://en.wikipedia.org/w/api.php"
         params = {
             "action": "opensearch",
         }
         resp = requests.get(search_url, params=params, timeout=10)
         data = resp.json()
+        titles = data[1]
         if not titles:
             return "维基百科未找到相关页面。"
         title = titles[0]
         extract_params = {
             "action": "query",
             "prop": "extracts",
         resp2 = requests.get(search_url, params=extract_params, timeout=10)
         data2 = resp2.json()
         pages = data2.get("query", {}).get("pages", {})
+        for page_info in pages.values():
             extract = page_info.get("extract", "")
             if extract:
                 return f"Wikipedia - {title}:\n{extract[:2000]}"
         return f"维基百科页面 '{title}' 未提供摘要。"
     except Exception as e:
         return f"维基百科搜索失败: {e}"
 # =============================================================================
+# LangGraph 状态与节点（允许多次工具调用，最大3次）
 # =============================================================================
 class AgentState(TypedDict):
     messages: Annotated[Sequence[BaseMessage], operator.add]
     final_answer: str
     task_id: str
+    tool_attempts: int  # 已使用的工具调用次数
+tools = [search_wikipedia, web_search, web_scraper, calculator,
+         analyze_image, transcribe_audio, get_youtube_transcript, download_file_for_task]
 tool_node = ToolNode(tools)
 llm = QwenLLM()
 functions = [convert_to_openai_function(t) for t in tools]
 llm_with_tools = llm.bind_functions(functions)
+MAX_TOOL_CALLS = 3   # 最多允许的工具调用次数
 def agent_node(state: AgentState) -> dict:
     messages = state["messages"]
     task_id = state.get("task_id", "")
+    # 系统提示：引导使用工具，但最终必须给出答案（不要闲聊）
     sys_prompt = f"""You are a helpful assistant answering GAIA Level 1 questions.
+You can use the following tools to find information:
+- search_wikipedia: search Wikipedia for facts.
+- web_search: general web search.
+- web_scraper: fetch content from a URL.
+- download_file_for_task: download a file associated with the current task (task_id: {task_id}). This can handle images, audio, and Python/text files.
+- analyze_image: describe an image given a URL or base64 data.
+- transcribe_audio: transcribe audio from a path or URL.
+- get_youtube_transcript: get captions from a YouTube video.
+- calculator: evaluate a mathematical expression.
+Instructions:
+1. Use the most appropriate tool(s) to gather the information needed to answer the question.
+2. If you need to follow up (e.g., search then scrape a specific page), you may use another tool.
+3. Once you have enough information, output ONLY the final answer as a short string (a word, number, date, or phrase). Do NOT include explanations, greetings, or the phrase "FINAL ANSWER:".
+4. If after using tools you still cannot find the answer, output exactly: "Unable to determine answer: insufficient information."
+5. Do not make up an answer; only respond based on the information you retrieved.
+Current task ID: {task_id}."""
     full = [SystemMessage(content=sys_prompt)] + list(messages)
     response = llm_with_tools.invoke(full)
     return {"messages": [response]}
     messages = state["messages"]
     last = messages[-1]
     tool_attempts = state.get("tool_attempts", 0)
+    # 如果已达到最大调用次数，强制进入 finish
     if tool_attempts >= MAX_TOOL_CALLS:
         return "finish"
+    # 如果 LLM 请求了工具调用，则去执行工具
     if hasattr(last, "additional_kwargs") and "function_call" in last.additional_kwargs:
         return "tools"
+    # 尚未使用过任何工具？强制要求使用工具（确保至少一次）
     tool_msg_count = sum(1 for m in messages if isinstance(m, ToolMessage))
     if tool_msg_count == 0:
         return "force_tool"
+    # 否则，LLM 已经给出了最终答案，进入 finish
     return "finish"
 def force_tool_node(state: AgentState) -> dict:
     new_msg = HumanMessage(
+        content="You haven't used any tool yet. Please use an appropriate tool to find the answer."
     )
     return {"messages": [new_msg]}
     return {"tool_attempts": state.get("tool_attempts", 0) + 1}
 def finish_node(state: AgentState) -> dict:
+    """从最后一条 AI 消息中提取最终答案，并清理格式"""
     last = state["messages"][-1]
     content = last.content
+    # 如果已经包含标准错误信息，直接返回
+    if "Unable to determine answer" in content:
+        return {"final_answer": content.split("\n")[0].strip()}
+    # 去除可能的前缀
+    answer = content.split("FINAL ANSWER:")[-1].strip()
+    # 尝试提取简洁答案：如果过长或包含问句，取第一句
+    if len(answer) > 50 or "?" in answer:
+        sentences = re.split(r'(?<=[.!?])\s+', answer)
+        for s in sentences:
+            s = s.strip()
+            if s and "?" not in s and not s.startswith(("Let me", "I ", "You ", "Please")):
+                answer = s
                 break
+        else:
+            answer = answer[:100].strip()
+    # 若最终答案仍为空或无效，给出错误原因
+    if not answer or answer in ("模型调用失败",):
+        if state.get("tool_attempts", 0) >= MAX_TOOL_CALLS:
+            answer = "Unable to determine answer: maximum tool calls reached."
         else:
             answer = "Unable to determine answer: insufficient information."
     workflow = StateGraph(AgentState)
     workflow.add_node("agent", agent_node)
     workflow.add_node("tools", tool_node)
     workflow.add_node("force_tool", force_tool_node)
     workflow.add_node("count_tools", increment_tool_count)
+    workflow.add_node("finish", finish_node)
     workflow.set_entry_point("agent")
     workflow.add_conditional_edges(
         "agent",
         should_continue,
+        {
+            "tools": "tools",
+            "force_tool": "force_tool",
+            "finish": "finish"
+        }
     )
+    # 工具调用后计数，然后返回 agent 继续思考
     workflow.add_edge("tools", "count_tools")
     workflow.add_edge("count_tools", "agent")
+    # force_tool 后返回 agent 重新决策
     workflow.add_edge("force_tool", "agent")
+    # finish 结束
     workflow.add_edge("finish", END)
     return workflow.compile()
     gr.Markdown("""
     # 🤖 GAIA Level 1 Agent (LangGraph + Qwen)
     **模型:** Qwen3.5-35B-A3B | **API:** agicto.com
+    点击按钮获取题目，Agent 可调用多个工具（最多3次）以获取答案，最后提交评分。
+    **工具：** 维基百科、网页搜索/抓取、图片分析、音频转录、YouTube字幕、文件下载。
     """)
     gr.LoginButton()
     run_btn = gr.Button("🚀 运行评测并提交", variant="primary")