Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions charts/model-engine/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,18 @@ env:
- name: CIRCLECI
value: "true"
{{- end }}
{{- if .Values.gunicorn }}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

anecdotally, we found it a lot easier to performance tune pure uvicorn, so we actually migrated most usage of gunicorn back to uvicorn. That being said, won't block your usage of it

- name: WORKER_TIMEOUT
value: {{ .Values.gunicorn.workerTimeout | quote }}
- name: GUNICORN_TIMEOUT
value: {{ .Values.gunicorn.gracefulTimeout | quote }}
- name: GUNICORN_GRACEFUL_TIMEOUT
value: {{ .Values.gunicorn.gracefulTimeout | quote }}
- name: GUNICORN_KEEP_ALIVE
value: {{ .Values.gunicorn.keepAlive | quote }}
- name: GUNICORN_WORKER_CLASS
value: {{ .Values.gunicorn.workerClass | quote }}
{{- end }}
{{- end }}

{{- define "modelEngine.serviceEnvGitTagFromHelmVar" }}
Expand Down
132 changes: 132 additions & 0 deletions charts/model-engine/values_onprem.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# values_onprem.yaml - On-premises deployment configuration
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you need to include this file here? I believe SGP maintains their own values.yaml in their own repo somewhere cc @nicolastomeo ?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, removed.

# This file contains default settings for on-premises deployments without cloud dependencies

# tag [required] is the LLM Engine docker image tag
tag: latest
context: onprem

image:
gatewayRepository: your-registry/model-engine
builderRepository: your-registry/model-engine
cacherRepository: your-registry/model-engine
forwarderRepository: your-registry/model-engine
pullPolicy: Always

secrets:
kubernetesDatabaseSecretName: llm-engine-postgres-credentials

db:
runDbInitScript: false
runDbMigrationScript: false

serviceAccount:
annotations:
"helm.sh/hook": pre-install,pre-upgrade
"helm.sh/hook-weight": "-2"
namespaces: []

imageBuilderServiceAccount:
create: true
annotations: {}
namespaces: []

service:
type: ClusterIP
port: 80

virtualservice:
enabled: true
annotations: {}
hostDomains:
- llm-engine.local
gateways:
- default/internal-gateway

hostDomain:
prefix: http://

destinationrule:
enabled: true
annotations: {}

replicaCount:
gateway: 2
cacher: 1
builder: 1

balloonConfig:
reserveHighPriority: true

balloons:
- acceleratorName: nvidia-ampere-a10
replicaCount: 0
- acceleratorName: nvidia-ampere-a100
replicaCount: 0
- acceleratorName: cpu
replicaCount: 0
- acceleratorName: nvidia-tesla-t4
replicaCount: 0
- acceleratorName: nvidia-hopper-h100
replicaCount: 0
gpuCount: 4

# On-premises specific configuration
config:
values:
infra:
cloud_provider: onprem
k8s_cluster_name: onprem_cluster
dns_host_domain: localhost
default_region: local
ml_account_id: "000000000000"
docker_repo_prefix: "your-registry"
redis_host: "redis-service"
s3_bucket: "local-storage"
profile_ml_worker: "default"
profile_ml_inference_worker: "default"
# On-premises configuration
onprem_redis_host: "redis-service"
onprem_redis_port: "6379"
onprem_redis_password: null
# AWS disable configuration
disable_aws: true
disable_aws_secrets_manager: true
# Celery broker configuration
disable_sqs_broker: true
disable_servicebus_broker: true
force_celery_redis: true
# DB engine configs
db_engine_pool_size: 10
db_engine_max_overflow: 10
db_engine_echo: false
db_engine_echo_pool: false
db_engine_disconnect_strategy: "pessimistic"

# Redis configuration for on-premises
redis:
auth: null
enableTLS: false
enableAuth: false
kedaSecretName: ""
unsafeSsl: false

# Gunicorn configuration for on-premises
gunicorn:
workerTimeout: 120
gracefulTimeout: 120
keepAlive: 2
workerClass: "model_engine_server.api.worker.LaunchWorker"

# Disable cloud-specific features
dd_trace_enabled: false
spellbook:
enabled: false

keda:
cooldownPeriod: 300

balloonNodeSelector:
node-lifecycle: normal

nodeSelector:
node-lifecycle: normal
13 changes: 13 additions & 0 deletions model-engine/model_engine_server/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,19 @@ def cache_redis_url(self) -> str:
creds = get_key_file(self.cache_redis_aws_secret_name) # Use default role
return creds["cache-url"]

# Check if we're in an onprem environment with direct Redis access via config
if infra_config().onprem_redis_host:
# Onprem Redis configuration
redis_host = infra_config().onprem_redis_host
redis_port = infra_config().onprem_redis_port
redis_password = infra_config().onprem_redis_password

if redis_password:
return f"redis://:{redis_password}@{redis_host}:{redis_port}/0"
else:
return f"redis://{redis_host}:{redis_port}/0"

# Azure Redis configuration (existing logic)
assert self.cache_redis_azure_host and infra_config().cloud_provider == "azure"
username = os.getenv("AZURE_OBJECT_ID")
token = DefaultAzureCredential().get_token("https://redis.azure.com/.default")
Expand Down
8 changes: 7 additions & 1 deletion model-engine/model_engine_server/core/aws/roles.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from boto3 import Session, client
from botocore.client import BaseClient
from model_engine_server.core.loggers import logger_name, make_logger
from model_engine_server.core.config import infra_config

logger = make_logger(logger_name())

Expand Down Expand Up @@ -114,12 +115,17 @@ def assume_role(role_arn: str, role_session_name: Optional[str] = None) -> AwsCr
)


def session(role: Optional[str], session_type: SessionT = Session) -> SessionT:
def session(role: Optional[str], session_type: SessionT = Session) -> Optional[SessionT]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't need to touch this; this should only be used if cloud_provider == 'aws'

"""Obtain an AWS session using an arbitrary caller-specified role.

:param:`session_type` defines the type of session to return. Most users will use
the default boto3 type. Some users required a special type (e.g aioboto3 session).
"""
# Check if AWS is disabled via config
if infra_config().disable_aws:
logger.warning(f"AWS disabled - skipping role assumption (ignoring: {role})")
return None

# Do not assume roles in CIRCLECI
if os.getenv("CIRCLECI"):
logger.warning(f"In circleci, not assuming role (ignoring: {role})")
Expand Down
17 changes: 8 additions & 9 deletions model-engine/model_engine_server/core/aws/secrets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""AWS secrets module."""

import json
import os
from functools import lru_cache
from typing import Optional

Expand All @@ -14,17 +15,15 @@

@lru_cache(maxsize=2)
def get_key_file(secret_name: str, aws_profile: Optional[str] = None):
# Check if AWS Secrets Manager is disabled via config
if infra_config().disable_aws_secrets_manager:
logger.warning(f"AWS Secrets Manager disabled - cannot retrieve secret: {secret_name}")
return {}

if aws_profile is not None:
session = boto3.Session(profile_name=aws_profile)
secret_manager = session.client("secretsmanager", region_name=infra_config().default_region)
else:
secret_manager = boto3.client("secretsmanager", region_name=infra_config().default_region)
try:
secret_value = json.loads(
secret_manager.get_secret_value(SecretId=secret_name)["SecretString"]
)
return secret_value
except ClientError as e:
logger.error(e)
logger.error(f"Failed to retrieve secret: {secret_name}")
return {}
response = secret_manager.get_secret_value(SecretId=secret_name)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we still want to do the try_catch wrapping to handle the cases where secret_manager client errors our

return json.loads(response["SecretString"])
33 changes: 22 additions & 11 deletions model-engine/model_engine_server/core/celery/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,18 +530,29 @@ def _get_backend_url_and_conf(
# use db_num=1 for backend to differentiate from broker
backend_url = get_redis_endpoint(1)
elif backend_protocol == "s3":
backend_url = "s3://"
if aws_role is None:
aws_session = session(infra_config().profile_ml_worker)
# Check if AWS is disabled via config - if so, fall back to Redis backend
if infra_config().disable_aws:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of doing this, wondering if we address this upstream and figure out how to pass in "redis" as the backend_protocol in the on-prem scenario

logger.warning("AWS disabled - falling back to Redis backend instead of S3")
backend_url = get_redis_endpoint(1)
else:
aws_session = session(aws_role)
out_conf_changes.update(
{
"s3_boto3_session": aws_session,
"s3_bucket": s3_bucket,
"s3_base_path": s3_base_path,
}
)
backend_url = "s3://"
if aws_role is None:
aws_session = session(infra_config().profile_ml_worker)
else:
aws_session = session(aws_role)

# If AWS is disabled, session will be None - fall back to Redis
if aws_session is None:
logger.warning("AWS session is None - falling back to Redis backend")
backend_url = get_redis_endpoint(1)
else:
out_conf_changes.update(
{
"s3_boto3_session": aws_session,
"s3_bucket": s3_bucket,
"s3_base_path": s3_base_path,
}
)
elif backend_protocol == "abs":
backend_url = f"azureblockblob://{os.getenv('ABS_ACCOUNT_NAME')}"
else:
Expand Down
11 changes: 11 additions & 0 deletions model-engine/model_engine_server/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,17 @@ class _InfraConfig:
firehose_role_arn: Optional[str] = None
firehose_stream_name: Optional[str] = None
prometheus_server_address: Optional[str] = None
# On-premises configuration
onprem_redis_host: Optional[str] = None
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm wondering if there's an easy way to merge onprem_redis_host w/ the other redis_host arg that already exists.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could consolidate this by using the existing redis_host field and adding logic to detect on-premises vs cloud environments. However, we kept them separate because:
redis_host is used for the message broker (Celery)
onprem_redis_host is used for the cache/result storage
They might point to different Redis instances in some deployments.. let me know if you would like me to combine them.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

redis_host is used for the message broker (Celery)
onprem_redis_host is used for the cache/result storage

This is a good distinction. I think it's better to make that more explicit in the naming as opposed to marking one as "onprem"

onprem_redis_port: Optional[str] = "6379"
onprem_redis_password: Optional[str] = None
# AWS disable configuration
disable_aws: bool = False
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we instead add a new cloud_provider == onprem and change logic based off that?

disable_aws_secrets_manager: bool = False
# Celery broker configuration
disable_sqs_broker: bool = False
disable_servicebus_broker: bool = False
force_celery_redis: bool = False


@dataclass
Expand Down
11 changes: 11 additions & 0 deletions model-engine/model_engine_server/core/configs/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,14 @@ db_engine_max_overflow: 10
db_engine_echo: false
db_engine_echo_pool: false
db_engine_disconnect_strategy: "pessimistic"
# On-premises configuration
onprem_redis_host: null
onprem_redis_port: "6379"
onprem_redis_password: null
# AWS disable configuration
disable_aws: false
disable_aws_secrets_manager: false
# Celery broker configuration
disable_sqs_broker: false
disable_servicebus_broker: false
force_celery_redis: false
21 changes: 18 additions & 3 deletions model-engine/model_engine_server/core/loggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,24 @@ def make_standard_logger(name: str, log_level: int = logging.INFO) -> logging.Lo
raise ValueError("Name must be a non-empty string.")
logger = logging.getLogger(name)
logger.setLevel(log_level)
logging.basicConfig(
format=LOG_FORMAT,
)

# Thread-safe logging configuration - only configure if not already configured
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

interesting, was this manifesting in a particular error?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there was a specific recursive logging error causing worker crashes:

RuntimeError: reentrant call inside <_io.BufferedWriter name=''>

This occurred during Gunicorn worker startup when multiple processes tried to initialize logging simultaneously, causing thread-unsafe logging configuration and race conditions. The error led to worker crashes, which then triggered the WORKER TIMEOUT errors we were seeing.

The issue was that multiple Gunicorn workers starting at the same time would compete to write to stderr during logging setup, causing a reentrant call error that crashed the worker processes.

if not logger.handlers:
# Use a lock to prevent race conditions in multi-threaded environments
import threading
with threading.Lock():
if not logger.handlers: # Double-check after acquiring lock
# Configure basic logging only if not already configured
if not logging.getLogger().handlers:
logging.basicConfig(
format=LOG_FORMAT,
)
# Add handler to this specific logger if needed
if not logger.handlers:
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(LOG_FORMAT))
logger.addHandler(handler)

return logger


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

import argparse
import os
import subprocess
from typing import List

Expand All @@ -14,18 +15,25 @@ def start_gunicorn_server(port: int, num_workers: int, debug: bool) -> None:
additional_args: List[str] = []
if debug:
additional_args.extend(["--reload", "--timeout", "0"])

# Use environment variables for configuration with fallbacks
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

timeout = int(os.environ.get('WORKER_TIMEOUT', os.environ.get('GUNICORN_TIMEOUT', 60)))
graceful_timeout = int(os.environ.get('GUNICORN_GRACEFUL_TIMEOUT', timeout))
keep_alive = int(os.environ.get('GUNICORN_KEEP_ALIVE', 2))
worker_class = os.environ.get('GUNICORN_WORKER_CLASS', 'model_engine_server.api.worker.LaunchWorker')

command = [
"gunicorn",
"--bind",
f"[::]:{port}",
"--timeout",
"60",
str(timeout),
"--graceful-timeout",
"60",
str(graceful_timeout),
"--keep-alive",
"2",
str(keep_alive),
"--worker-class",
"model_engine_server.api.worker.LaunchWorker",
worker_class,
"--workers",
f"{num_workers}",
*additional_args,
Expand Down
Loading