diff --git a/docs/components/llms/models/siliconflow.mdx b/docs/components/llms/models/siliconflow.mdx new file mode 100644 index 0000000000..448e2fa1dd --- /dev/null +++ b/docs/components/llms/models/siliconflow.mdx @@ -0,0 +1,97 @@ +--- +title: SiliconFlow +--- + +[SiliconFlow](https://siliconflow.com/) is an AI inference platform that provides access to various open-source LLMs including DeepSeek, Qwen, GLM, and more. + +In order to use LLMs from SiliconFlow, go to their [platform](https://siliconflow.com/) and get the API key. Set the API key as `SILICONFLOW_API_KEY` environment variable to use the model as given below in the example. + +## Usage + + +```python Python +import os +from mem0 import Memory + +os.environ["OPENAI_API_KEY"] = "your-api-key" # used for embedding model +os.environ["SILICONFLOW_API_KEY"] = "your-api-key" + +config = { + "llm": { + "provider": "siliconflow", + "config": { + "model": "deepseek-ai/DeepSeek-V3", + "temperature": 0.1, + "max_tokens": 2000, + } + } +} + +m = Memory.from_config(config) +messages = [ + {"role": "user", "content": "I'm planning to watch a movie tonight. Any recommendations?"}, + {"role": "assistant", "content": "How about thriller movies? They can be quite engaging."}, + {"role": "user", "content": "I'm not a big fan of thriller movies but I love sci-fi movies."}, + {"role": "assistant", "content": "Got it! I'll avoid thriller recommendations and suggest sci-fi movies in the future."} +] +m.add(messages, user_id="alice", metadata={"category": "movies"}) +``` + +```typescript TypeScript +import { Memory } from 'mem0ai/oss'; + +const config = { + llm: { + provider: 'siliconflow', + config: { + apiKey: process.env.SILICONFLOW_API_KEY || '', + model: 'deepseek-ai/DeepSeek-V3', + temperature: 0.1, + maxTokens: 2000, + }, + }, +}; + +const memory = new Memory(config); +const messages = [ + {"role": "user", "content": "I'm planning to watch a movie tonight. Any recommendations?"}, + {"role": "assistant", "content": "How about thriller movies? They can be quite engaging."}, + {"role": "user", "content": "I'm not a big fan of thriller movies but I love sci-fi movies."}, + {"role": "assistant", "content": "Got it! I'll avoid thriller recommendations and suggest sci-fi movies in the future."} +] +await memory.add(messages, { userId: "alice", metadata: { category: "movies" } }); +``` + + +## Custom Base URL + +If you're using a different region (e.g., China), you can specify a custom base URL: + +```python +config = { + "llm": { + "provider": "siliconflow", + "config": { + "model": "deepseek-ai/DeepSeek-V3", + "base_url": "https://api.siliconflow.cn/v1", # For China region + "temperature": 0.1, + "max_tokens": 2000, + } + } +} +``` + +## Supported Models + +SiliconFlow supports various models including: +- deepseek-ai/DeepSeek-V3 +- deepseek-ai/DeepSeek-R1 +- Qwen/Qwen2.5-72B-Instruct +- THUDM/GLM-4-9B +- meta-llama/Meta-Llama-3.1-8B-Instruct + +And many more. Check the [SiliconFlow documentation](https://docs.siliconflow.com/) for the full list. + +## Config + +All available parameters for the `siliconflow` config are present in [Master List of All Params in Config](../config). diff --git a/mem0/configs/llms/siliconflow.py b/mem0/configs/llms/siliconflow.py new file mode 100644 index 0000000000..513c5faa2d --- /dev/null +++ b/mem0/configs/llms/siliconflow.py @@ -0,0 +1,62 @@ +from typing import Any, Callable, Optional + +from mem0.configs.llms.base import BaseLlmConfig + + +class SiliconFlowConfig(BaseLlmConfig): + """ + Configuration class for SiliconFlow-specific parameters. + Inherits from BaseLlmConfig and adds SiliconFlow-specific settings. + """ + + def __init__( + self, + # Base parameters + model: Optional[str] = None, + temperature: float = 0.1, + api_key: Optional[str] = None, + max_tokens: int = 2000, + top_p: float = 0.1, + top_k: int = 1, + enable_vision: bool = False, + vision_details: Optional[str] = "auto", + http_client_proxies: Optional[dict] = None, + # SiliconFlow-specific parameters + base_url: Optional[str] = None, + # Response monitoring callback + response_callback: Optional[Callable[[Any, dict, dict], None]] = None, + ): + """ + Initialize SiliconFlow configuration. + + Args: + model: SiliconFlow model to use, defaults to "Qwen/Qwen2.5-7B-Instruct" + temperature: Controls randomness, defaults to 0.1 + api_key: SiliconFlow API key, defaults to None + max_tokens: Maximum tokens to generate, defaults to 2000 + top_p: Nucleus sampling parameter, defaults to 0.1 + top_k: Top-k sampling parameter, defaults to 1 + enable_vision: Enable vision capabilities, defaults to False + vision_details: Vision detail level, defaults to "auto" + http_client_proxies: HTTP client proxy settings, defaults to None + base_url: SiliconFlow API base URL, defaults to "https://api.siliconflow.com/v1" + response_callback: Optional callback for monitoring LLM responses. + """ + # Initialize base parameters + super().__init__( + model=model, + temperature=temperature, + api_key=api_key, + max_tokens=max_tokens, + top_p=top_p, + top_k=top_k, + enable_vision=enable_vision, + vision_details=vision_details, + http_client_proxies=http_client_proxies, + ) + + # SiliconFlow-specific parameters + self.base_url = base_url + + # Response monitoring + self.response_callback = response_callback diff --git a/mem0/utils/factory.py b/mem0/utils/factory.py index ab3fc77a3c..acaacc9082 100644 --- a/mem0/utils/factory.py +++ b/mem0/utils/factory.py @@ -9,6 +9,7 @@ from mem0.configs.llms.lmstudio import LMStudioConfig from mem0.configs.llms.ollama import OllamaConfig from mem0.configs.llms.openai import OpenAIConfig +from mem0.configs.llms.siliconflow import SiliconFlowConfig from mem0.configs.llms.vllm import VllmConfig from mem0.configs.rerankers.base import BaseRerankerConfig from mem0.configs.rerankers.cohere import CohereRerankerConfig @@ -36,6 +37,7 @@ class LlmFactory: "ollama": ("mem0.llms.ollama.OllamaLLM", OllamaConfig), "openai": ("mem0.llms.openai.OpenAILLM", OpenAIConfig), "groq": ("mem0.llms.groq.GroqLLM", BaseLlmConfig), + "siliconflow": ("mem0.llms.siliconflow.SiliconFlowLLM", SiliconFlowConfig), "together": ("mem0.llms.together.TogetherLLM", BaseLlmConfig), "aws_bedrock": ("mem0.llms.aws_bedrock.AWSBedrockLLM", BaseLlmConfig), "litellm": ("mem0.llms.litellm.LiteLLM", BaseLlmConfig), diff --git a/openmemory/api/app/utils/categorization.py b/openmemory/api/app/utils/categorization.py index e20c400526..32cb7ca678 100644 --- a/openmemory/api/app/utils/categorization.py +++ b/openmemory/api/app/utils/categorization.py @@ -1,6 +1,9 @@ import logging -from typing import List +import os +from typing import List, Optional +from app.database import SessionLocal +from app.models import Config as ConfigModel from app.utils.prompts import MEMORY_CATEGORIZATION_PROMPT from dotenv import load_dotenv from openai import OpenAI @@ -8,16 +11,64 @@ from tenacity import retry, stop_after_attempt, wait_exponential load_dotenv() -openai_client = OpenAI() class MemoryCategories(BaseModel): categories: List[str] +def get_llm_config(): + """Get LLM configuration from database or use defaults.""" + try: + db = SessionLocal() + db_config = db.query(ConfigModel).filter(ConfigModel.key == "main").first() + + if db_config and "mem0" in db_config.value and "llm" in db_config.value["mem0"]: + llm_config = db_config.value["mem0"]["llm"] + db.close() + return llm_config + + db.close() + except Exception as e: + logging.warning(f"Failed to load LLM config from database: {e}") + + # Default configuration + return { + "provider": "openai", + "config": { + "model": "gpt-4o-mini", + "api_key": os.getenv("OPENAI_API_KEY") + } + } + + +def parse_env_value(value): + """Parse environment variable references in config values.""" + if isinstance(value, str) and value.startswith("env:"): + env_var = value.split(":", 1)[1] + return os.getenv(env_var) + return value + + @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=15)) def get_categories_for_memory(memory: str) -> List[str]: try: + # Get LLM configuration + llm_config = get_llm_config() + config = llm_config.get("config", {}) + + # Parse environment variables + api_key = parse_env_value(config.get("api_key", os.getenv("OPENAI_API_KEY"))) + model = config.get("model", "gpt-4o-mini") + base_url = parse_env_value(config.get("openai_base_url")) if "openai_base_url" in config else None + + # Create OpenAI client with configured settings + client_kwargs = {"api_key": api_key} + if base_url: + client_kwargs["base_url"] = base_url + + openai_client = OpenAI(**client_kwargs) + messages = [ {"role": "system", "content": MEMORY_CATEGORIZATION_PROMPT}, {"role": "user", "content": memory} @@ -25,7 +76,7 @@ def get_categories_for_memory(memory: str) -> List[str]: # Let OpenAI handle the pydantic parsing directly completion = openai_client.beta.chat.completions.parse( - model="gpt-4o-mini", + model=model, messages=messages, response_format=MemoryCategories, temperature=0