Final_Assignment_Template

Sleeping

App Files Files Community

Final_Assignment_Template / OpenRouter_Agent.py

ABVM

Update OpenRouter_Agent.py

b4c715a verified 8 months ago

raw

history blame contribute delete

5.84 kB

	"""High level multi-agent system powered by OpenRouter models.

	This module sets up a manager agent that delegates tasks to specialized
	web and information agents. It relies on the ``smolagent`` framework and
	OpenRouter API models for language generation and verification.
	"""

	from smolagents import (
	CodeAgent,
	VisitWebpageTool,
	WebSearchTool,
	WikipediaSearchTool,
	PythonInterpreterTool,
	FinalAnswerTool,
	OpenAIServerModel,
	Tool,
	)
	from smolagents.utils import encode_image_base64, make_image_url
	#from vision_tool import image_reasoning_tool
	import os
	HF_API_TOKEN = os.getenv("HF_API_TOKEN")

	#audio_transcribe_tool = Tool.from_space(
	# space_id = "hf-audio/whisper-large-v3",
	# name = "audio_to_text",
	# description = "Transcribe long-form YouTube videos or audio inputs. Paste the URL to a YouTube video or upload audio file to get the transcript.",

	#)

	#object_detection_tool = Tool.from_space(
	# space_id = "stevengrove/YOLO-World",
	# name = "Real-Time Open-Vocabulary Object Detector",
	# description = "Detect objects in images or videos."
	#)


	OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
	if not OPENROUTER_API_KEY:
	raise EnvironmentError("OPENROUTER_API_KEY environment variable not set")

	common = dict(
	api_base="https://openrouter.ai/api/v1",
	api_key=OPENROUTER_API_KEY,
	#extra_body={"usage": {"include": True}}
	)


	class MultiAgentSystem:
	"""Coordinates specialized agents and their underlying models.

	The system instantiates a ``web_agent`` for browsing and data collection,
	an ``info_agent`` for computation and image reasoning, and a
	``manager_agent`` that plans tasks and verifies answers using several
	OpenRouter models.
	"""
	def __init__(self):
	self.deepseek_model = OpenAIServerModel(
	model_id="deepseek/deepseek-r1-0528:free",
	**common,
	)
	self.qwen_model = OpenAIServerModel(
	model_id="qwen/qwen-2.5-coder-32b-instruct:free",
	**common,
	)
	self.gemini_model = OpenAIServerModel(
	model_id="google/gemini-2.0-flash-exp:free",
	**common,
	)

	self.web_agent = CodeAgent(
	model =self.qwen_model,
	tools=[WebSearchTool(), VisitWebpageTool(), WikipediaSearchTool()],
	name="web_agent",
	description=(
	"You are a web browsing agent. Whenever the given {task} involves browsing "
	"the web or a specific website such as Wikipedia or YouTube, you will use "
	"the provided tools. For web-based factual and retrieval tasks, be as precise and source-reliable as possible."
	),
	additional_authorized_imports=[
	"markdownify",
	"json",
	"requests",
	"urllib.request",
	"urllib.parse",
	"wikipedia-api",
	],
	verbosity_level=0,
	max_steps=10,
	)

	self.info_agent = CodeAgent(
	model =self.qwen_model,
	tools=[PythonInterpreterTool()],
	name="info_agent",
	description=(
	"You are an agent tasked with cleaning, parsing, calculating information, and performing OCR if images are provided in the {task}. "
	"You can also analyze images, videos and audio using available tools such as audio_transcribe_tool and object_detection_tool when needed. You handle all math, code, and data manipulation. Use numpy, math, and available libraries. "
	"For image, video, audio tasks, use pytesseract, PIL, chess, or audio_transcribe_tool and object_detection_tool as required."
	),
	additional_authorized_imports=[
	"numpy",
	"math",
	"pytesseract",
	"PIL",
	"chess",
	"bs4",
	"BeautifulSoup",
	"openpyxl",
	"lxml",
	],

	)

	self.manager_agent = CodeAgent(
	model =self.deepseek_model,
	tools=[FinalAnswerTool()],
	managed_agents=[self.web_agent, self.info_agent],
	name="manager_agent",
	description=(
	"You are the manager agent. Respond with a single python code-block only. "
	"Inside that block you must call the other agents via `agent(name)(task)` "
	"and end with `final_answer({...})`. No natural language outside the block"
	),
	additional_authorized_imports=[
	"json",
	"pandas",
	"numpy",
	],
	planning_interval=6,
	verbosity_level=2,
	#final_answer_checks=[self.check_reasoning],
	max_steps=4,
	)

	#def check_reasoning(self, final_answer, agent_memory):
	#model = self.gemini_model
	#verification_prompt = (
	# f"Here is a user-given task and the agent steps: {agent_memory.get_succinct_steps()}. "
	# f"The proposed final answer is: {final_answer}. "
	# "Please check that the reasoning process is correct: do they correctly answer the given task? "
	#"First list reasons why yes/no, then write your final decision: PASS in caps lock if it is satisfactory, FAIL if it is not."
	#)
	#output = model(verification_prompt)
	#print("Feedback: ", output)
	#if "FAIL" in output:
	#raise Exception(output)
	#return True

	def __call__(self, task: str) -> str:
	"""
	Run the manager_agent on the given user task and
	return its final answer text.
	"""
	return self.manager_agent(task)