"""High level multi-agent system powered by OpenRouter models. This module sets up a manager agent that delegates tasks to specialized web and information agents. It relies on the ``smolagent`` framework and OpenRouter API models for language generation and verification. """ from smolagents import ( CodeAgent, VisitWebpageTool, WebSearchTool, WikipediaSearchTool, PythonInterpreterTool, FinalAnswerTool, OpenAIServerModel, Tool, ) from smolagents.utils import encode_image_base64, make_image_url #from vision_tool import image_reasoning_tool import os HF_API_TOKEN = os.getenv("HF_API_TOKEN") #audio_transcribe_tool = Tool.from_space( # space_id = "hf-audio/whisper-large-v3", # name = "audio_to_text", # description = "Transcribe long-form YouTube videos or audio inputs. Paste the URL to a YouTube video or upload audio file to get the transcript.", #) #object_detection_tool = Tool.from_space( # space_id = "stevengrove/YOLO-World", # name = "Real-Time Open-Vocabulary Object Detector", # description = "Detect objects in images or videos." #) OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") if not OPENROUTER_API_KEY: raise EnvironmentError("OPENROUTER_API_KEY environment variable not set") common = dict( api_base="https://openrouter.ai/api/v1", api_key=OPENROUTER_API_KEY, #extra_body={"usage": {"include": True}} ) class MultiAgentSystem: """Coordinates specialized agents and their underlying models. The system instantiates a ``web_agent`` for browsing and data collection, an ``info_agent`` for computation and image reasoning, and a ``manager_agent`` that plans tasks and verifies answers using several OpenRouter models. """ def __init__(self): self.deepseek_model = OpenAIServerModel( model_id="deepseek/deepseek-r1-0528:free", **common, ) self.qwen_model = OpenAIServerModel( model_id="qwen/qwen-2.5-coder-32b-instruct:free", **common, ) self.gemini_model = OpenAIServerModel( model_id="google/gemini-2.0-flash-exp:free", **common, ) self.web_agent = CodeAgent( model =self.qwen_model, tools=[WebSearchTool(), VisitWebpageTool(), WikipediaSearchTool()], name="web_agent", description=( "You are a web browsing agent. Whenever the given {task} involves browsing " "the web or a specific website such as Wikipedia or YouTube, you will use " "the provided tools. For web-based factual and retrieval tasks, be as precise and source-reliable as possible." ), additional_authorized_imports=[ "markdownify", "json", "requests", "urllib.request", "urllib.parse", "wikipedia-api", ], verbosity_level=0, max_steps=10, ) self.info_agent = CodeAgent( model =self.qwen_model, tools=[PythonInterpreterTool()], name="info_agent", description=( "You are an agent tasked with cleaning, parsing, calculating information, and performing OCR if images are provided in the {task}. " "You can also analyze images, videos and audio using available tools such as audio_transcribe_tool and object_detection_tool when needed. You handle all math, code, and data manipulation. Use numpy, math, and available libraries. " "For image, video, audio tasks, use pytesseract, PIL, chess, or audio_transcribe_tool and object_detection_tool as required." ), additional_authorized_imports=[ "numpy", "math", "pytesseract", "PIL", "chess", "bs4", "BeautifulSoup", "openpyxl", "lxml", ], ) self.manager_agent = CodeAgent( model =self.deepseek_model, tools=[FinalAnswerTool()], managed_agents=[self.web_agent, self.info_agent], name="manager_agent", description=( "You are the manager agent. **Respond with a single python code-block only**. " "Inside that block you must call the other agents via `agent(name)(task)` " "and end with `final_answer({...})`. **No natural language outside the block**" ), additional_authorized_imports=[ "json", "pandas", "numpy", ], planning_interval=6, verbosity_level=2, #final_answer_checks=[self.check_reasoning], max_steps=4, ) #def check_reasoning(self, final_answer, agent_memory): #model = self.gemini_model #verification_prompt = ( # f"Here is a user-given task and the agent steps: {agent_memory.get_succinct_steps()}. " # f"The proposed final answer is: {final_answer}. " # "Please check that the reasoning process is correct: do they correctly answer the given task? " #"First list reasons why yes/no, then write your final decision: PASS in caps lock if it is satisfactory, FAIL if it is not." #) #output = model(verification_prompt) #print("Feedback: ", output) #if "FAIL" in output: #raise Exception(output) #return True def __call__(self, task: str) -> str: """ Run the manager_agent on the given user task and return its final answer text. """ return self.manager_agent(task)