Spaces:
Sleeping
Sleeping
| """High level multi-agent system powered by OpenRouter models. | |
| This module sets up a manager agent that delegates tasks to specialized | |
| web and information agents. It relies on the ``smolagent`` framework and | |
| OpenRouter API models for language generation and verification. | |
| """ | |
| from smolagents import ( | |
| CodeAgent, | |
| VisitWebpageTool, | |
| WebSearchTool, | |
| WikipediaSearchTool, | |
| PythonInterpreterTool, | |
| FinalAnswerTool, | |
| OpenAIServerModel, | |
| Tool, | |
| ) | |
| from smolagents.utils import encode_image_base64, make_image_url | |
| #from vision_tool import image_reasoning_tool | |
| import os | |
| HF_API_TOKEN = os.getenv("HF_API_TOKEN") | |
| #audio_transcribe_tool = Tool.from_space( | |
| # space_id = "hf-audio/whisper-large-v3", | |
| # name = "audio_to_text", | |
| # description = "Transcribe long-form YouTube videos or audio inputs. Paste the URL to a YouTube video or upload audio file to get the transcript.", | |
| #) | |
| #object_detection_tool = Tool.from_space( | |
| # space_id = "stevengrove/YOLO-World", | |
| # name = "Real-Time Open-Vocabulary Object Detector", | |
| # description = "Detect objects in images or videos." | |
| #) | |
| OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") | |
| if not OPENROUTER_API_KEY: | |
| raise EnvironmentError("OPENROUTER_API_KEY environment variable not set") | |
| common = dict( | |
| api_base="https://openrouter.ai/api/v1", | |
| api_key=OPENROUTER_API_KEY, | |
| #extra_body={"usage": {"include": True}} | |
| ) | |
| class MultiAgentSystem: | |
| """Coordinates specialized agents and their underlying models. | |
| The system instantiates a ``web_agent`` for browsing and data collection, | |
| an ``info_agent`` for computation and image reasoning, and a | |
| ``manager_agent`` that plans tasks and verifies answers using several | |
| OpenRouter models. | |
| """ | |
| def __init__(self): | |
| self.deepseek_model = OpenAIServerModel( | |
| model_id="deepseek/deepseek-r1-0528:free", | |
| **common, | |
| ) | |
| self.qwen_model = OpenAIServerModel( | |
| model_id="qwen/qwen-2.5-coder-32b-instruct:free", | |
| **common, | |
| ) | |
| self.gemini_model = OpenAIServerModel( | |
| model_id="google/gemini-2.0-flash-exp:free", | |
| **common, | |
| ) | |
| self.web_agent = CodeAgent( | |
| model =self.qwen_model, | |
| tools=[WebSearchTool(), VisitWebpageTool(), WikipediaSearchTool()], | |
| name="web_agent", | |
| description=( | |
| "You are a web browsing agent. Whenever the given {task} involves browsing " | |
| "the web or a specific website such as Wikipedia or YouTube, you will use " | |
| "the provided tools. For web-based factual and retrieval tasks, be as precise and source-reliable as possible." | |
| ), | |
| additional_authorized_imports=[ | |
| "markdownify", | |
| "json", | |
| "requests", | |
| "urllib.request", | |
| "urllib.parse", | |
| "wikipedia-api", | |
| ], | |
| verbosity_level=0, | |
| max_steps=10, | |
| ) | |
| self.info_agent = CodeAgent( | |
| model =self.qwen_model, | |
| tools=[PythonInterpreterTool()], | |
| name="info_agent", | |
| description=( | |
| "You are an agent tasked with cleaning, parsing, calculating information, and performing OCR if images are provided in the {task}. " | |
| "You can also analyze images, videos and audio using available tools such as audio_transcribe_tool and object_detection_tool when needed. You handle all math, code, and data manipulation. Use numpy, math, and available libraries. " | |
| "For image, video, audio tasks, use pytesseract, PIL, chess, or audio_transcribe_tool and object_detection_tool as required." | |
| ), | |
| additional_authorized_imports=[ | |
| "numpy", | |
| "math", | |
| "pytesseract", | |
| "PIL", | |
| "chess", | |
| "bs4", | |
| "BeautifulSoup", | |
| "openpyxl", | |
| "lxml", | |
| ], | |
| ) | |
| self.manager_agent = CodeAgent( | |
| model =self.deepseek_model, | |
| tools=[FinalAnswerTool()], | |
| managed_agents=[self.web_agent, self.info_agent], | |
| name="manager_agent", | |
| description=( | |
| "You are the manager agent. **Respond with a single python code-block only**. " | |
| "Inside that block you must call the other agents via `agent(name)(task)` " | |
| "and end with `final_answer({...})`. **No natural language outside the block**" | |
| ), | |
| additional_authorized_imports=[ | |
| "json", | |
| "pandas", | |
| "numpy", | |
| ], | |
| planning_interval=6, | |
| verbosity_level=2, | |
| #final_answer_checks=[self.check_reasoning], | |
| max_steps=4, | |
| ) | |
| #def check_reasoning(self, final_answer, agent_memory): | |
| #model = self.gemini_model | |
| #verification_prompt = ( | |
| # f"Here is a user-given task and the agent steps: {agent_memory.get_succinct_steps()}. " | |
| # f"The proposed final answer is: {final_answer}. " | |
| # "Please check that the reasoning process is correct: do they correctly answer the given task? " | |
| #"First list reasons why yes/no, then write your final decision: PASS in caps lock if it is satisfactory, FAIL if it is not." | |
| #) | |
| #output = model(verification_prompt) | |
| #print("Feedback: ", output) | |
| #if "FAIL" in output: | |
| #raise Exception(output) | |
| #return True | |
| def __call__(self, task: str) -> str: | |
| """ | |
| Run the manager_agent on the given user task and | |
| return its final answer text. | |
| """ | |
| return self.manager_agent(task) | |