diff --git a/README.md b/README.md index 71bcac3..1e3a863 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,6 @@ Build your AI agents in three lines of code! * Three lines of code setup * Simple Agent Definition * Fast Responses -* Multi-Vendor Support * Solana Integration * Multi-Agent Swarm * Multi-Modal (Images & Audio & Text) @@ -44,7 +43,6 @@ Build your AI agents in three lines of code! * Easy three lines of code setup * Simple agent definition using JSON * Fast AI responses -* Multi-vendor support including OpenAI, Grok, and Gemini AI services * Solana Integration * MCP tool usage with first-class support for [Zapier](https://zapier.com/mcp) * Integrated observability and tracing via [Pydantic Logfire](https://pydantic.dev/logfire) @@ -79,25 +77,10 @@ Build your AI agents in three lines of code! ### AI Models Used **OpenAI** -* [gpt-4.1](https://platform.openai.com/docs/models/gpt-4.1) (agent - can be overridden) -* [gpt-4.1-nano](https://platform.openai.com/docs/models/gpt-4.1-nano) (router - can be overridden) +* [gpt-4.1-nano](https://platform.openai.com/docs/models/gpt-4.1-nano) (agent & router) * [text-embedding-3-large](https://platform.openai.com/docs/models/text-embedding-3-large) (embedding) * [tts-1](https://platform.openai.com/docs/models/tts-1) (audio TTS) * [gpt-4o-mini-transcribe](https://platform.openai.com/docs/models/gpt-4o-mini-transcribe) (audio transcription) -* [gpt-image-1](https://platform.openai.com/docs/models/gpt-image-1) (image generation - can be overridden) -* [gpt-4o-mini-search-preview](https://platform.openai.com/docs/models/gpt-4o-mini-search-preview) (Internet search) - -**Grok** -* [grok-3-fast](https://x.ai/api#pricing) (agent - optional) -* [grok-3-mini-fast](https://x.ai/api#pricing) (router - optional) -* [grok-2-image](https://x.ai/api#pricing) (image generation - optional) - -**Gemini** -* [gemini-2.5-flash-preview-04-17](https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview) (agent & router - optional) -* [imagen-3.0-generate-002](https://ai.google.dev/gemini-api/docs/models#imagen-3) (image generation - optional) - -**Ollama** -* [gemma:4b-it-qat](https://ollama.com/library/gemma3) - (agent & router - optional) ## Installation @@ -432,36 +415,6 @@ config = { } ``` -### Grok - -```python -config = { - "grok": { - "api_key": "your-grok-api-key", - }, -} -``` - -### Gemini - -```python -config = { - "gemini": { - "api_key": "your-gemini-api-key", - }, -} -``` - -### Ollama - -```python -config = { - "ollama": { - "api_key": "use-this-key-1010" - }, -} -``` - ### Knowledge Base The Knowledge Base (KB) is meant to store text values and/or PDFs (extracts text) - can handle very large PDFs. diff --git a/docs/index.rst b/docs/index.rst index bb8d22f..043a7ad 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -366,31 +366,6 @@ Observability and Tracing - Optional }, } - -Grok - Optional -~~~~~~~~~~~~~~~~ - -.. code-block:: python - - config = { - "grok": { - "api_key": "your-grok-api-key", - }, - } - - -Gemini - Optional -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: python - - config = { - "gemini": { - "api_key": "your-gemini-api-key", - }, - } - - Knowledge Base - Optional ~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pyproject.toml b/pyproject.toml index 341975b..62ce144 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "solana-agent" -version = "29.3.0" +version = "30.0.0" description = "AI Agents for Solana" authors = ["Bevan Hunt "] license = "MIT" diff --git a/solana_agent/adapters/openai_adapter.py b/solana_agent/adapters/openai_adapter.py index 5579789..fb76079 100644 --- a/solana_agent/adapters/openai_adapter.py +++ b/solana_agent/adapters/openai_adapter.py @@ -33,8 +33,8 @@ T = TypeVar("T", bound=BaseModel) -DEFAULT_CHAT_MODEL = "gpt-4.1" -DEFAULT_VISION_MODEL = "gpt-4.1" +DEFAULT_CHAT_MODEL = "gpt-4.1-nano" +DEFAULT_VISION_MODEL = "gpt-4.1-nano" DEFAULT_PARSE_MODEL = "gpt-4.1-nano" DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large" DEFAULT_EMBEDDING_DIMENSIONS = 3072 @@ -163,9 +163,8 @@ async def generate_text( api_key: Optional[str] = None, base_url: Optional[str] = None, model: Optional[str] = None, - functions: Optional[List[Dict[str, Any]]] = None, - function_call: Optional[Union[str, Dict[str, Any]]] = None, - ) -> Any: # pragma: no cover + tools: Optional[List[Dict[str, Any]]] = None, + ) -> str: # pragma: no cover """Generate text or function call from OpenAI models.""" messages = [] if system_prompt: @@ -176,10 +175,8 @@ async def generate_text( "messages": messages, "model": model or self.text_model, } - if functions: - request_params["functions"] = functions - if function_call: - request_params["function_call"] = function_call + if tools: + request_params["tools"] = tools if api_key and base_url: client = AsyncOpenAI(api_key=api_key, base_url=base_url) @@ -410,8 +407,7 @@ async def parse_structured_output( api_key: Optional[str] = None, base_url: Optional[str] = None, model: Optional[str] = None, - functions: Optional[List[Dict[str, Any]]] = None, - function_call: Optional[Union[str, Dict[str, Any]]] = None, + tools: Optional[List[Dict[str, Any]]] = None, ) -> T: # pragma: no cover """Generate structured output using Pydantic model parsing with Instructor.""" @@ -439,10 +435,8 @@ async def parse_structured_output( "response_model": model_class, "max_retries": 2, # Automatically retry on validation errors } - if functions: - create_args["tools"] = functions - if function_call: - create_args["function_call"] = function_call + if tools: + create_args["tools"] = tools response = await patched_client.chat.completions.create(**create_args) return response diff --git a/solana_agent/factories/agent_factory.py b/solana_agent/factories/agent_factory.py index de74b31..91ddfe9 100644 --- a/solana_agent/factories/agent_factory.py +++ b/solana_agent/factories/agent_factory.py @@ -195,27 +195,6 @@ def create_from_config(config: Dict[str, Any]) -> QueryService: model="gemini-2.5-flash-preview-05-20", ) # pragma: no cover - elif "grok" in config and "api_key" in config["grok"]: - # Create primary services - agent_service = AgentService( - llm_provider=llm_adapter, - business_mission=business_mission, - config=config, - api_key=config["grok"]["api_key"], - base_url="https://api.x.ai/v1", - model="grok-3-fast", - output_guardrails=output_guardrails, - ) # pragma: no cover - - # Create routing service - routing_service = RoutingService( - llm_provider=llm_adapter, - agent_service=agent_service, - api_key=config["gemini"]["api_key"], - base_url="https://api.x.ai/v1", - model="grok-3-mini-fast", - ) # pragma: no cover - elif "ollama" in config and "api_key" in config["ollama"]: # Create primary services agent_service = AgentService( diff --git a/solana_agent/interfaces/providers/llm.py b/solana_agent/interfaces/providers/llm.py index 1e38e46..065a271 100644 --- a/solana_agent/interfaces/providers/llm.py +++ b/solana_agent/interfaces/providers/llm.py @@ -28,8 +28,7 @@ async def generate_text( api_key: Optional[str] = None, base_url: Optional[str] = None, model: Optional[str] = None, - functions: Optional[List[Dict[str, Any]]] = None, - function_call: Optional[Union[str, Dict[str, Any]]] = None, + tools: Optional[List[Dict[str, Any]]] = None, ) -> Any: """Generate text from the language model.""" pass @@ -43,8 +42,7 @@ async def parse_structured_output( api_key: Optional[str] = None, base_url: Optional[str] = None, model: Optional[str] = None, - functions: Optional[List[Dict[str, Any]]] = None, - function_call: Optional[Union[str, Dict[str, Any]]] = None, + tools: Optional[List[Dict[str, Any]]] = None, ) -> T: """Generate structured output using a specific model class.""" pass @@ -106,6 +104,7 @@ async def generate_text_with_images( images: List[Union[str, bytes]], system_prompt: str = "", detail: Literal["low", "high", "auto"] = "auto", + tools: Optional[List[Dict[str, Any]]] = None, ) -> str: """Generate text from the language model using images.""" pass diff --git a/solana_agent/services/agent.py b/solana_agent/services/agent.py index bd0a293..aca7f06 100644 --- a/solana_agent/services/agent.py +++ b/solana_agent/services/agent.py @@ -267,11 +267,15 @@ async def generate_response( full_prompt += f"USER IDENTIFIER: {user_id}" # Get OpenAI function schemas for this agent's tools - functions = [ + tools = [ { - "name": tool["name"], - "description": tool.get("description", ""), - "parameters": tool.get("parameters", {}), + "type": "function", + "function": { + "name": tool["name"], + "description": tool.get("description", ""), + "parameters": tool.get("parameters", {}), + "strict": True, + }, } for tool in self.get_agent_tools(agent_name) ] @@ -286,8 +290,7 @@ async def generate_response( api_key=self.api_key, base_url=self.base_url, model=self.model, - functions=functions if functions else None, - function_call="auto" if functions else None, + tools=tools if tools else None, ) yield model_instance return @@ -295,15 +298,25 @@ async def generate_response( # --- Streaming text/audio with tool support (as before) --- response_text = "" while True: - response = await self.llm_provider.generate_text( - prompt=full_prompt, - system_prompt=system_prompt, - functions=functions if functions else None, - function_call="auto" if functions else None, - api_key=self.api_key, - base_url=self.base_url, - model=self.model, - ) + if not images: + response = await self.llm_provider.generate_text( + prompt=full_prompt, + system_prompt=system_prompt, + api_key=self.api_key, + base_url=self.base_url, + model=self.model, + tools=tools if tools else None, + ) + else: + response = await self.llm_provider.generate_text_with_images( + prompt=full_prompt, + system_prompt=system_prompt, + api_key=self.api_key, + base_url=self.base_url, + model=self.model, + tools=tools if tools else None, + images=images, + ) if ( not response or not hasattr(response, "choices") @@ -316,25 +329,24 @@ async def generate_response( choice = response.choices[0] message = getattr(choice, "message", choice) - # If the model wants to call a function/tool - if hasattr(message, "function_call") and message.function_call: - function_name = message.function_call.name - arguments = json.loads(message.function_call.arguments) - logger.info( - f"Model requested tool '{function_name}' with args: {arguments}" - ) - - # Execute the tool (async) - tool_result = await self.execute_tool( - agent_name, function_name, arguments - ) - - # Add the tool result to the prompt for the next round - full_prompt += ( - f"\n\nTool '{function_name}' was called with arguments {arguments}.\n" - f"Result: {tool_result}\n" - ) - continue # Loop again, LLM will see tool result and may call another tool or finish + if hasattr(message, "tool_calls") and message.tool_calls: + for tool_call in message.tool_calls: + if tool_call.type == "function": + function_name = tool_call.function.name + arguments = json.loads(tool_call.function.arguments) + logger.info( + f"Model requested tool '{function_name}' with args: {arguments}" + ) + # Execute the tool (async) + tool_result = await self.execute_tool( + agent_name, function_name, arguments + ) + # Add the tool result to the prompt for the next round + full_prompt += ( + f"\n\nTool '{function_name}' was called with arguments {arguments}.\n" + f"Result: {tool_result}\n" + ) + continue # Otherwise, it's a normal message (final answer) response_text = message.content