Final_Assignment_Template

Sleeping

App Files Files Community

ABVM commited on Jun 22, 2025

Commit

2253aff

verified ·

1 Parent(s): 623cd35

Update OpenRouter_Agent.py

Browse files

Files changed (1) hide show

OpenRouter_Agent.py +19 -5

OpenRouter_Agent.py CHANGED Viewed

@@ -12,12 +12,26 @@ from smolagents import (
     WikipediaSearchTool,
     PythonInterpreterTool,
     FinalAnswerTool,
-    OpenAIServerModel
 )
 from smolagents.utils import encode_image_base64, make_image_url
-from vision_tool import image_reasoning_tool
 import os
 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
 if not OPENROUTER_API_KEY:
     raise EnvironmentError("OPENROUTER_API_KEY environment variable not set")
@@ -74,12 +88,12 @@ class MultiAgentSystem:
         self.info_agent = CodeAgent(
             model =self.qwen_model,
-            tools=[PythonInterpreterTool(), image_reasoning_tool],
             name="info_agent",
             description=(
                 "You are an agent tasked with cleaning, parsing, calculating information, and performing OCR if images are provided in the {task}. "
-                "You can also analyze images using a vision model. You handle all math, code, and data manipulation. Use numpy, math, and available libraries. "
-                "For image or chess tasks, use pytesseract, PIL, chess, or the image_reasoning_tool as required."
             ),
             additional_authorized_imports=[
                 "numpy",

     WikipediaSearchTool,
     PythonInterpreterTool,
     FinalAnswerTool,
+    OpenAIServerModel,
+    Tool,
 )
 from smolagents.utils import encode_image_base64, make_image_url
+#from vision_tool import image_reasoning_tool
 import os
+audio_transcribe_tool = Tool.from_space(
+    "openai/whisper",
+    name = "audio_to_text",
+    description = "Transcribe long-form YouTube videos or audio inputs. Paste the URL to a YouTube video or upload audio file to get the transcript."
+)
+object_detection_tool = Tool.from_space(
+    "stevengrove/YOLO-World",
+    name = "Real-Time Open-Vocabulary Object Detector",
+    description = "Detect objects in images or videos."
+)
 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
 if not OPENROUTER_API_KEY:
     raise EnvironmentError("OPENROUTER_API_KEY environment variable not set")
         self.info_agent = CodeAgent(
             model =self.qwen_model,
+            tools=[PythonInterpreterTool(), audio_transcribe_tool, object_detection_tool ],
             name="info_agent",
             description=(
                 "You are an agent tasked with cleaning, parsing, calculating information, and performing OCR if images are provided in the {task}. "
+                "You can also analyze images, videos and audio using available tools such as audio_transcribe_tool and object_detection_tool when needed. You handle all math, code, and data manipulation. Use numpy, math, and available libraries. "
+                "For image, video, audio tasks, use pytesseract, PIL, chess, or audio_transcribe_tool and object_detection_tool as required."
             ),
             additional_authorized_imports=[
                 "numpy",