Skip to content

Commit c930476

Browse files
feature: warm-up cache
1 parent 31e38ef commit c930476

File tree

3 files changed

+104
-1
lines changed

3 files changed

+104
-1
lines changed

litellm/proxy/_types.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1741,6 +1741,10 @@ class ConfigGeneralSettings(LiteLLMPydanticObjectBase):
17411741
description="[DEPRECATED] Use 'user_header_mappings' instead. When set, the header value is treated as the end user id unless overridden by user_header_mappings.",
17421742
)
17431743
user_header_mappings: Optional[List[UserHeaderMapping]] = None
1744+
preload_users_limit: Optional[int] = Field(
1745+
default=100,
1746+
description="Maximum number of users to pre-load into cache on startup. Set to 0 to disable preloading. Defaults to 0."
1747+
)
17441748

17451749

17461750
class ConfigYAML(LiteLLMPydanticObjectBase):

litellm/proxy/proxy_server.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,30 @@ async def proxy_startup_event(app: FastAPI):
652652
litellm_proxy_budget_name=litellm_proxy_admin_name
653653
)
654654

655+
### PRELOAD USERS INTO CACHE ###
656+
if prisma_client is not None and general_settings is not None:
657+
preload_limit = general_settings.get("preload_users_limit", 0)
658+
if preload_limit > 0:
659+
from litellm.proxy.utils import preload_users_into_cache
660+
verbose_proxy_logger.info(
661+
f"Starting background user preload into cache: limit={preload_limit}"
662+
)
663+
664+
async def _preload_users_background():
665+
try:
666+
await preload_users_into_cache(
667+
prisma_client=prisma_client, # type: ignore
668+
user_api_key_cache=user_api_key_cache,
669+
limit=preload_limit
670+
)
671+
verbose_proxy_logger.info("User preload background task completed")
672+
except Exception as e:
673+
verbose_proxy_logger.error(f"Failed to preload users in background: {e}")
674+
# Don't fail startup if preload fails
675+
pass
676+
677+
asyncio.create_task(_preload_users_background())
678+
655679
### START BATCH WRITING DB + CHECKING NEW MODELS###
656680
if prisma_client is not None:
657681
await ProxyStartupEvent.initialize_scheduled_background_jobs(

litellm/proxy/utils.py

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2074,7 +2074,9 @@ async def get_data( # noqa: PLR0915
20742074
elif table_name == "enduser" and budget_id_list is not None:
20752075
if query_type == "find_all":
20762076
response = await self.db.litellm_endusertable.find_many(
2077-
where={"budget_id": {"in": budget_id_list}}
2077+
where={"budget_id": {"in": budget_id_list}},
2078+
order={"litellm_budget_table": {"created_at": "desc"}},
2079+
include={"litellm_budget_table": True}
20782080
)
20792081
return response
20802082
elif table_name == "team":
@@ -3025,6 +3027,79 @@ async def _cache_user_row(user_id: str, cache: DualCache, db: PrismaClient):
30253027
return
30263028

30273029

3030+
async def preload_users_into_cache(
3031+
prisma_client: PrismaClient,
3032+
user_api_key_cache: DualCache,
3033+
limit: int = 1000,
3034+
) -> Dict[str, Any]:
3035+
"""
3036+
Pre-load users from database into cache on startup.
3037+
3038+
Args:
3039+
prisma_client: Database client
3040+
user_api_key_cache: Cache instance to store users
3041+
limit: Maximum number of users to pre-load (must be > 0 and <= 10000)
3042+
Returns:
3043+
Dict with preload statistics
3044+
"""
3045+
try:
3046+
verbose_proxy_logger.info(f"Starting user preload: limit={limit}")
3047+
users = await prisma_client.db.litellm_endusertable.find_many(
3048+
take=limit,
3049+
order={"litellm_budget_table": {"created_at": "desc"}},
3050+
include={"litellm_budget_table": True}
3051+
)
3052+
3053+
if not users:
3054+
verbose_proxy_logger.info("No users found in database to preload")
3055+
return {"preloaded_count": 0, "total_users": 0}
3056+
3057+
preloaded_count = 0
3058+
failed_count = 0
3059+
3060+
for user in users:
3061+
try:
3062+
cache_key = "end_user_id:{}".format(user.user_id)
3063+
existing_cache = user_api_key_cache.get_cache(key=cache_key)
3064+
if existing_cache is not None:
3065+
continue
3066+
3067+
3068+
user_api_key_cache.set_cache(
3069+
key=cache_key,
3070+
value=user.dict()
3071+
)
3072+
3073+
preloaded_count += 1
3074+
3075+
if preloaded_count % 100 == 0:
3076+
verbose_proxy_logger.debug(f"Preloaded {preloaded_count} users...")
3077+
3078+
except Exception as e:
3079+
failed_count += 1
3080+
verbose_proxy_logger.warning(f"Failed to preload user {user.user_id}: {e}")
3081+
continue
3082+
3083+
result = {
3084+
"preloaded_count": preloaded_count,
3085+
"failed_count": failed_count,
3086+
"total_users": len(users),
3087+
"limit": limit
3088+
}
3089+
3090+
verbose_proxy_logger.info(
3091+
f"User preload completed: {preloaded_count} users cached, "
3092+
f"{failed_count} failed, {len(users)} total users in database"
3093+
)
3094+
3095+
return result
3096+
3097+
except Exception as e:
3098+
error_msg = f"Error during user preload: {e}"
3099+
verbose_proxy_logger.error(error_msg)
3100+
return {"error": error_msg, "preloaded_count": 0}
3101+
3102+
30283103
async def send_email(
30293104
receiver_email: Optional[str] = None,
30303105
subject: Optional[str] = None,

0 commit comments

Comments
 (0)