ABVM commited on
Commit
2253aff
·
verified ·
1 Parent(s): 623cd35

Update OpenRouter_Agent.py

Browse files
Files changed (1) hide show
  1. OpenRouter_Agent.py +19 -5
OpenRouter_Agent.py CHANGED
@@ -12,12 +12,26 @@ from smolagents import (
12
  WikipediaSearchTool,
13
  PythonInterpreterTool,
14
  FinalAnswerTool,
15
- OpenAIServerModel
 
16
  )
17
  from smolagents.utils import encode_image_base64, make_image_url
18
- from vision_tool import image_reasoning_tool
19
  import os
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
22
  if not OPENROUTER_API_KEY:
23
  raise EnvironmentError("OPENROUTER_API_KEY environment variable not set")
@@ -74,12 +88,12 @@ class MultiAgentSystem:
74
 
75
  self.info_agent = CodeAgent(
76
  model =self.qwen_model,
77
- tools=[PythonInterpreterTool(), image_reasoning_tool],
78
  name="info_agent",
79
  description=(
80
  "You are an agent tasked with cleaning, parsing, calculating information, and performing OCR if images are provided in the {task}. "
81
- "You can also analyze images using a vision model. You handle all math, code, and data manipulation. Use numpy, math, and available libraries. "
82
- "For image or chess tasks, use pytesseract, PIL, chess, or the image_reasoning_tool as required."
83
  ),
84
  additional_authorized_imports=[
85
  "numpy",
 
12
  WikipediaSearchTool,
13
  PythonInterpreterTool,
14
  FinalAnswerTool,
15
+ OpenAIServerModel,
16
+ Tool,
17
  )
18
  from smolagents.utils import encode_image_base64, make_image_url
19
+ #from vision_tool import image_reasoning_tool
20
  import os
21
 
22
+ audio_transcribe_tool = Tool.from_space(
23
+ "openai/whisper",
24
+ name = "audio_to_text",
25
+ description = "Transcribe long-form YouTube videos or audio inputs. Paste the URL to a YouTube video or upload audio file to get the transcript."
26
+
27
+ )
28
+
29
+ object_detection_tool = Tool.from_space(
30
+ "stevengrove/YOLO-World",
31
+ name = "Real-Time Open-Vocabulary Object Detector",
32
+ description = "Detect objects in images or videos."
33
+ )
34
+
35
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
36
  if not OPENROUTER_API_KEY:
37
  raise EnvironmentError("OPENROUTER_API_KEY environment variable not set")
 
88
 
89
  self.info_agent = CodeAgent(
90
  model =self.qwen_model,
91
+ tools=[PythonInterpreterTool(), audio_transcribe_tool, object_detection_tool ],
92
  name="info_agent",
93
  description=(
94
  "You are an agent tasked with cleaning, parsing, calculating information, and performing OCR if images are provided in the {task}. "
95
+ "You can also analyze images, videos and audio using available tools such as audio_transcribe_tool and object_detection_tool when needed. You handle all math, code, and data manipulation. Use numpy, math, and available libraries. "
96
+ "For image, video, audio tasks, use pytesseract, PIL, chess, or audio_transcribe_tool and object_detection_tool as required."
97
  ),
98
  additional_authorized_imports=[
99
  "numpy",