Final_Assignment_Template / OpenRouter_Agent.py
ABVM's picture
Update OpenRouter_Agent.py
b4c715a verified
"""High level multi-agent system powered by OpenRouter models.
This module sets up a manager agent that delegates tasks to specialized
web and information agents. It relies on the ``smolagent`` framework and
OpenRouter API models for language generation and verification.
"""
from smolagents import (
CodeAgent,
VisitWebpageTool,
WebSearchTool,
WikipediaSearchTool,
PythonInterpreterTool,
FinalAnswerTool,
OpenAIServerModel,
Tool,
)
from smolagents.utils import encode_image_base64, make_image_url
#from vision_tool import image_reasoning_tool
import os
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
#audio_transcribe_tool = Tool.from_space(
# space_id = "hf-audio/whisper-large-v3",
# name = "audio_to_text",
# description = "Transcribe long-form YouTube videos or audio inputs. Paste the URL to a YouTube video or upload audio file to get the transcript.",
#)
#object_detection_tool = Tool.from_space(
# space_id = "stevengrove/YOLO-World",
# name = "Real-Time Open-Vocabulary Object Detector",
# description = "Detect objects in images or videos."
#)
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
if not OPENROUTER_API_KEY:
raise EnvironmentError("OPENROUTER_API_KEY environment variable not set")
common = dict(
api_base="https://openrouter.ai/api/v1",
api_key=OPENROUTER_API_KEY,
#extra_body={"usage": {"include": True}}
)
class MultiAgentSystem:
"""Coordinates specialized agents and their underlying models.
The system instantiates a ``web_agent`` for browsing and data collection,
an ``info_agent`` for computation and image reasoning, and a
``manager_agent`` that plans tasks and verifies answers using several
OpenRouter models.
"""
def __init__(self):
self.deepseek_model = OpenAIServerModel(
model_id="deepseek/deepseek-r1-0528:free",
**common,
)
self.qwen_model = OpenAIServerModel(
model_id="qwen/qwen-2.5-coder-32b-instruct:free",
**common,
)
self.gemini_model = OpenAIServerModel(
model_id="google/gemini-2.0-flash-exp:free",
**common,
)
self.web_agent = CodeAgent(
model =self.qwen_model,
tools=[WebSearchTool(), VisitWebpageTool(), WikipediaSearchTool()],
name="web_agent",
description=(
"You are a web browsing agent. Whenever the given {task} involves browsing "
"the web or a specific website such as Wikipedia or YouTube, you will use "
"the provided tools. For web-based factual and retrieval tasks, be as precise and source-reliable as possible."
),
additional_authorized_imports=[
"markdownify",
"json",
"requests",
"urllib.request",
"urllib.parse",
"wikipedia-api",
],
verbosity_level=0,
max_steps=10,
)
self.info_agent = CodeAgent(
model =self.qwen_model,
tools=[PythonInterpreterTool()],
name="info_agent",
description=(
"You are an agent tasked with cleaning, parsing, calculating information, and performing OCR if images are provided in the {task}. "
"You can also analyze images, videos and audio using available tools such as audio_transcribe_tool and object_detection_tool when needed. You handle all math, code, and data manipulation. Use numpy, math, and available libraries. "
"For image, video, audio tasks, use pytesseract, PIL, chess, or audio_transcribe_tool and object_detection_tool as required."
),
additional_authorized_imports=[
"numpy",
"math",
"pytesseract",
"PIL",
"chess",
"bs4",
"BeautifulSoup",
"openpyxl",
"lxml",
],
)
self.manager_agent = CodeAgent(
model =self.deepseek_model,
tools=[FinalAnswerTool()],
managed_agents=[self.web_agent, self.info_agent],
name="manager_agent",
description=(
"You are the manager agent. **Respond with a single python code-block only**. "
"Inside that block you must call the other agents via `agent(name)(task)` "
"and end with `final_answer({...})`. **No natural language outside the block**"
),
additional_authorized_imports=[
"json",
"pandas",
"numpy",
],
planning_interval=6,
verbosity_level=2,
#final_answer_checks=[self.check_reasoning],
max_steps=4,
)
#def check_reasoning(self, final_answer, agent_memory):
#model = self.gemini_model
#verification_prompt = (
# f"Here is a user-given task and the agent steps: {agent_memory.get_succinct_steps()}. "
# f"The proposed final answer is: {final_answer}. "
# "Please check that the reasoning process is correct: do they correctly answer the given task? "
#"First list reasons why yes/no, then write your final decision: PASS in caps lock if it is satisfactory, FAIL if it is not."
#)
#output = model(verification_prompt)
#print("Feedback: ", output)
#if "FAIL" in output:
#raise Exception(output)
#return True
def __call__(self, task: str) -> str:
"""
Run the manager_agent on the given user task and
return its final answer text.
"""
return self.manager_agent(task)