feature: warm-up cache

AlexsanderHamir · AlexsanderHamir · commit c930476753a1 · 2025-09-25T15:46:59.000-07:00
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
@@ -1741,6 +1741,10 @@ class ConfigGeneralSettings(LiteLLMPydanticObjectBase):
         description="[DEPRECATED] Use 'user_header_mappings' instead. When set, the header value is treated as the end user id unless overridden by user_header_mappings.",
     )
     user_header_mappings: Optional[List[UserHeaderMapping]] = None
+    preload_users_limit: Optional[int] = Field(
+        default=100,
+        description="Maximum number of users to pre-load into cache on startup. Set to 0 to disable preloading. Defaults to 0."
+    )
 
 
 class ConfigYAML(LiteLLMPydanticObjectBase):
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
@@ -652,6 +652,30 @@ async def proxy_startup_event(app: FastAPI):
             litellm_proxy_budget_name=litellm_proxy_admin_name
         )
 
+    ### PRELOAD USERS INTO CACHE ###
+    if prisma_client is not None and general_settings is not None:
+        preload_limit = general_settings.get("preload_users_limit", 0)
+        if preload_limit > 0:
+            from litellm.proxy.utils import preload_users_into_cache        
+            verbose_proxy_logger.info(
+                f"Starting background user preload into cache: limit={preload_limit}"
+            )
+            
+            async def _preload_users_background():
+                try:
+                    await preload_users_into_cache(
+                        prisma_client=prisma_client,  # type: ignore
+                        user_api_key_cache=user_api_key_cache,
+                        limit=preload_limit
+                    )
+                    verbose_proxy_logger.info("User preload background task completed")
+                except Exception as e:
+                    verbose_proxy_logger.error(f"Failed to preload users in background: {e}")
+                    # Don't fail startup if preload fails
+                    pass
+            
+            asyncio.create_task(_preload_users_background())
+
     ### START BATCH WRITING DB + CHECKING NEW MODELS###
     if prisma_client is not None:
         await ProxyStartupEvent.initialize_scheduled_background_jobs(
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
@@ -2074,7 +2074,9 @@ async def get_data(  # noqa: PLR0915
             elif table_name == "enduser" and budget_id_list is not None:
                 if query_type == "find_all":
                     response = await self.db.litellm_endusertable.find_many(
-                        where={"budget_id": {"in": budget_id_list}}
+                        where={"budget_id": {"in": budget_id_list}},
+                        order={"litellm_budget_table": {"created_at": "desc"}},
+                        include={"litellm_budget_table": True}
                     )
                     return response
             elif table_name == "team":
@@ -3025,6 +3027,79 @@ async def _cache_user_row(user_id: str, cache: DualCache, db: PrismaClient):
     return
 
 
+async def preload_users_into_cache(
+    prisma_client: PrismaClient,
+    user_api_key_cache: DualCache,
+    limit: int = 1000,
+) -> Dict[str, Any]:
+    """
+    Pre-load users from database into cache on startup.
+    
+    Args:
+        prisma_client: Database client
+        user_api_key_cache: Cache instance to store users
+        limit: Maximum number of users to pre-load (must be > 0 and <= 10000)
+    Returns:
+        Dict with preload statistics
+    """
+    try:
+        verbose_proxy_logger.info(f"Starting user preload: limit={limit}")
+        users = await prisma_client.db.litellm_endusertable.find_many(
+            take=limit,
+            order={"litellm_budget_table": {"created_at": "desc"}},
+            include={"litellm_budget_table": True}
+        )
+        
+        if not users:
+            verbose_proxy_logger.info("No users found in database to preload")
+            return {"preloaded_count": 0, "total_users": 0}
+        
+        preloaded_count = 0
+        failed_count = 0
+        
+        for user in users:
+            try:
+                cache_key = "end_user_id:{}".format(user.user_id)
+                existing_cache = user_api_key_cache.get_cache(key=cache_key)
+                if existing_cache is not None:
+                    continue
+                
+
+                user_api_key_cache.set_cache(
+                    key=cache_key,
+                    value=user.dict()
+                )
+                
+                preloaded_count += 1
+                
+                if preloaded_count % 100 == 0:
+                    verbose_proxy_logger.debug(f"Preloaded {preloaded_count} users...")
+                    
+            except Exception as e:
+                failed_count += 1
+                verbose_proxy_logger.warning(f"Failed to preload user {user.user_id}: {e}")
+                continue
+        
+        result = {
+            "preloaded_count": preloaded_count,
+            "failed_count": failed_count,
+            "total_users": len(users),
+            "limit": limit
+        }
+        
+        verbose_proxy_logger.info(
+            f"User preload completed: {preloaded_count} users cached, "
+            f"{failed_count} failed, {len(users)} total users in database"
+        )
+        
+        return result
+        
+    except Exception as e:
+        error_msg = f"Error during user preload: {e}"
+        verbose_proxy_logger.error(error_msg)
+        return {"error": error_msg, "preloaded_count": 0}
+
+
 async def send_email(
     receiver_email: Optional[str] = None,
     subject: Optional[str] = None,

Original file line number	Diff line number	Diff line change
`@@ -1741,6 +1741,10 @@ class ConfigGeneralSettings(LiteLLMPydanticObjectBase):`
`1741`	`1741`	`description="[DEPRECATED] Use 'user_header_mappings' instead. When set, the header value is treated as the end user id unless overridden by user_header_mappings.",`
`1742`	`1742`	`)`
`1743`	`1743`	`user_header_mappings: Optional[List[UserHeaderMapping]] = None`
	`1744`	`+ preload_users_limit: Optional[int] = Field(`
	`1745`	`+ default=100,`
	`1746`	`+ description="Maximum number of users to pre-load into cache on startup. Set to 0 to disable preloading. Defaults to 0."`
	`1747`	`+ )`
`1744`	`1748`
`1745`	`1749`
`1746`	`1750`	`class ConfigYAML(LiteLLMPydanticObjectBase):`