-
Notifications
You must be signed in to change notification settings - Fork 67
on prem changes to disable cloud solutions #700
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
16bea80
d72f6c7
4442dd8
3febc44
a1fe268
61a67ae
10f9e4f
0dcd54a
de8cb1a
324fe4d
b138a2e
1ee453a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
# values_onprem.yaml - On-premises deployment configuration | ||
|
||
# This file contains default settings for on-premises deployments without cloud dependencies | ||
|
||
# tag [required] is the LLM Engine docker image tag | ||
tag: latest | ||
context: onprem | ||
|
||
image: | ||
gatewayRepository: your-registry/model-engine | ||
builderRepository: your-registry/model-engine | ||
cacherRepository: your-registry/model-engine | ||
forwarderRepository: your-registry/model-engine | ||
pullPolicy: Always | ||
|
||
secrets: | ||
kubernetesDatabaseSecretName: llm-engine-postgres-credentials | ||
|
||
db: | ||
runDbInitScript: false | ||
runDbMigrationScript: false | ||
|
||
serviceAccount: | ||
annotations: | ||
"helm.sh/hook": pre-install,pre-upgrade | ||
"helm.sh/hook-weight": "-2" | ||
namespaces: [] | ||
|
||
imageBuilderServiceAccount: | ||
create: true | ||
annotations: {} | ||
namespaces: [] | ||
|
||
service: | ||
type: ClusterIP | ||
port: 80 | ||
|
||
virtualservice: | ||
enabled: true | ||
annotations: {} | ||
hostDomains: | ||
- llm-engine.local | ||
gateways: | ||
- default/internal-gateway | ||
|
||
hostDomain: | ||
prefix: http:// | ||
|
||
destinationrule: | ||
enabled: true | ||
annotations: {} | ||
|
||
replicaCount: | ||
gateway: 2 | ||
cacher: 1 | ||
builder: 1 | ||
|
||
balloonConfig: | ||
reserveHighPriority: true | ||
|
||
balloons: | ||
- acceleratorName: nvidia-ampere-a10 | ||
replicaCount: 0 | ||
- acceleratorName: nvidia-ampere-a100 | ||
replicaCount: 0 | ||
- acceleratorName: cpu | ||
replicaCount: 0 | ||
- acceleratorName: nvidia-tesla-t4 | ||
replicaCount: 0 | ||
- acceleratorName: nvidia-hopper-h100 | ||
replicaCount: 0 | ||
gpuCount: 4 | ||
|
||
# On-premises specific configuration | ||
config: | ||
values: | ||
infra: | ||
cloud_provider: onprem | ||
k8s_cluster_name: onprem_cluster | ||
dns_host_domain: localhost | ||
default_region: local | ||
ml_account_id: "000000000000" | ||
docker_repo_prefix: "your-registry" | ||
redis_host: "redis-service" | ||
s3_bucket: "local-storage" | ||
profile_ml_worker: "default" | ||
profile_ml_inference_worker: "default" | ||
# On-premises configuration | ||
onprem_redis_host: "redis-service" | ||
onprem_redis_port: "6379" | ||
onprem_redis_password: null | ||
# AWS disable configuration | ||
disable_aws: true | ||
disable_aws_secrets_manager: true | ||
# Celery broker configuration | ||
disable_sqs_broker: true | ||
disable_servicebus_broker: true | ||
force_celery_redis: true | ||
# DB engine configs | ||
db_engine_pool_size: 10 | ||
db_engine_max_overflow: 10 | ||
db_engine_echo: false | ||
db_engine_echo_pool: false | ||
db_engine_disconnect_strategy: "pessimistic" | ||
|
||
# Redis configuration for on-premises | ||
redis: | ||
auth: null | ||
enableTLS: false | ||
enableAuth: false | ||
kedaSecretName: "" | ||
unsafeSsl: false | ||
|
||
# Gunicorn configuration for on-premises | ||
gunicorn: | ||
workerTimeout: 120 | ||
gracefulTimeout: 120 | ||
keepAlive: 2 | ||
workerClass: "model_engine_server.api.worker.LaunchWorker" | ||
|
||
# Disable cloud-specific features | ||
dd_trace_enabled: false | ||
spellbook: | ||
enabled: false | ||
|
||
keda: | ||
cooldownPeriod: 300 | ||
|
||
balloonNodeSelector: | ||
node-lifecycle: normal | ||
|
||
nodeSelector: | ||
node-lifecycle: normal |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ | |
from boto3 import Session, client | ||
from botocore.client import BaseClient | ||
from model_engine_server.core.loggers import logger_name, make_logger | ||
from model_engine_server.core.config import infra_config | ||
|
||
logger = make_logger(logger_name()) | ||
|
||
|
@@ -114,12 +115,17 @@ def assume_role(role_arn: str, role_session_name: Optional[str] = None) -> AwsCr | |
) | ||
|
||
|
||
def session(role: Optional[str], session_type: SessionT = Session) -> SessionT: | ||
def session(role: Optional[str], session_type: SessionT = Session) -> Optional[SessionT]: | ||
|
||
"""Obtain an AWS session using an arbitrary caller-specified role. | ||
|
||
:param:`session_type` defines the type of session to return. Most users will use | ||
the default boto3 type. Some users required a special type (e.g aioboto3 session). | ||
""" | ||
# Check if AWS is disabled via config | ||
if infra_config().disable_aws: | ||
logger.warning(f"AWS disabled - skipping role assumption (ignoring: {role})") | ||
return None | ||
|
||
# Do not assume roles in CIRCLECI | ||
if os.getenv("CIRCLECI"): | ||
logger.warning(f"In circleci, not assuming role (ignoring: {role})") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
"""AWS secrets module.""" | ||
|
||
import json | ||
import os | ||
from functools import lru_cache | ||
from typing import Optional | ||
|
||
|
@@ -14,17 +15,15 @@ | |
|
||
@lru_cache(maxsize=2) | ||
def get_key_file(secret_name: str, aws_profile: Optional[str] = None): | ||
# Check if AWS Secrets Manager is disabled via config | ||
if infra_config().disable_aws_secrets_manager: | ||
logger.warning(f"AWS Secrets Manager disabled - cannot retrieve secret: {secret_name}") | ||
return {} | ||
|
||
if aws_profile is not None: | ||
session = boto3.Session(profile_name=aws_profile) | ||
secret_manager = session.client("secretsmanager", region_name=infra_config().default_region) | ||
else: | ||
secret_manager = boto3.client("secretsmanager", region_name=infra_config().default_region) | ||
try: | ||
secret_value = json.loads( | ||
secret_manager.get_secret_value(SecretId=secret_name)["SecretString"] | ||
) | ||
return secret_value | ||
except ClientError as e: | ||
logger.error(e) | ||
logger.error(f"Failed to retrieve secret: {secret_name}") | ||
return {} | ||
response = secret_manager.get_secret_value(SecretId=secret_name) | ||
|
||
return json.loads(response["SecretString"]) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -530,18 +530,29 @@ def _get_backend_url_and_conf( | |
# use db_num=1 for backend to differentiate from broker | ||
backend_url = get_redis_endpoint(1) | ||
elif backend_protocol == "s3": | ||
backend_url = "s3://" | ||
if aws_role is None: | ||
aws_session = session(infra_config().profile_ml_worker) | ||
# Check if AWS is disabled via config - if so, fall back to Redis backend | ||
if infra_config().disable_aws: | ||
|
||
logger.warning("AWS disabled - falling back to Redis backend instead of S3") | ||
backend_url = get_redis_endpoint(1) | ||
else: | ||
aws_session = session(aws_role) | ||
out_conf_changes.update( | ||
{ | ||
"s3_boto3_session": aws_session, | ||
"s3_bucket": s3_bucket, | ||
"s3_base_path": s3_base_path, | ||
} | ||
) | ||
backend_url = "s3://" | ||
if aws_role is None: | ||
aws_session = session(infra_config().profile_ml_worker) | ||
else: | ||
aws_session = session(aws_role) | ||
|
||
# If AWS is disabled, session will be None - fall back to Redis | ||
if aws_session is None: | ||
logger.warning("AWS session is None - falling back to Redis backend") | ||
backend_url = get_redis_endpoint(1) | ||
else: | ||
out_conf_changes.update( | ||
{ | ||
"s3_boto3_session": aws_session, | ||
"s3_bucket": s3_bucket, | ||
"s3_base_path": s3_base_path, | ||
} | ||
) | ||
elif backend_protocol == "abs": | ||
backend_url = f"azureblockblob://{os.getenv('ABS_ACCOUNT_NAME')}" | ||
else: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,6 +49,17 @@ class _InfraConfig: | |
firehose_role_arn: Optional[str] = None | ||
firehose_stream_name: Optional[str] = None | ||
prometheus_server_address: Optional[str] = None | ||
# On-premises configuration | ||
onprem_redis_host: Optional[str] = None | ||
|
||
onprem_redis_port: Optional[str] = "6379" | ||
onprem_redis_password: Optional[str] = None | ||
# AWS disable configuration | ||
disable_aws: bool = False | ||
|
||
disable_aws_secrets_manager: bool = False | ||
# Celery broker configuration | ||
disable_sqs_broker: bool = False | ||
disable_servicebus_broker: bool = False | ||
force_celery_redis: bool = False | ||
|
||
|
||
@dataclass | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -80,9 +80,24 @@ def make_standard_logger(name: str, log_level: int = logging.INFO) -> logging.Lo | |
raise ValueError("Name must be a non-empty string.") | ||
logger = logging.getLogger(name) | ||
logger.setLevel(log_level) | ||
logging.basicConfig( | ||
format=LOG_FORMAT, | ||
) | ||
|
||
# Thread-safe logging configuration - only configure if not already configured | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. interesting, was this manifesting in a particular error? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, there was a specific recursive logging error causing worker crashes: RuntimeError: reentrant call inside <_io.BufferedWriter name=''> This occurred during Gunicorn worker startup when multiple processes tried to initialize logging simultaneously, causing thread-unsafe logging configuration and race conditions. The error led to worker crashes, which then triggered the WORKER TIMEOUT errors we were seeing. The issue was that multiple Gunicorn workers starting at the same time would compete to write to stderr during logging setup, causing a reentrant call error that crashed the worker processes. |
||
if not logger.handlers: | ||
# Use a lock to prevent race conditions in multi-threaded environments | ||
import threading | ||
with threading.Lock(): | ||
if not logger.handlers: # Double-check after acquiring lock | ||
# Configure basic logging only if not already configured | ||
if not logging.getLogger().handlers: | ||
logging.basicConfig( | ||
format=LOG_FORMAT, | ||
) | ||
# Add handler to this specific logger if needed | ||
if not logger.handlers: | ||
handler = logging.StreamHandler() | ||
handler.setFormatter(logging.Formatter(LOG_FORMAT)) | ||
logger.addHandler(handler) | ||
|
||
return logger | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
""" | ||
|
||
import argparse | ||
import os | ||
import subprocess | ||
from typing import List | ||
|
||
|
@@ -14,18 +15,25 @@ def start_gunicorn_server(port: int, num_workers: int, debug: bool) -> None: | |
additional_args: List[str] = [] | ||
if debug: | ||
additional_args.extend(["--reload", "--timeout", "0"]) | ||
|
||
# Use environment variables for configuration with fallbacks | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
timeout = int(os.environ.get('WORKER_TIMEOUT', os.environ.get('GUNICORN_TIMEOUT', 60))) | ||
graceful_timeout = int(os.environ.get('GUNICORN_GRACEFUL_TIMEOUT', timeout)) | ||
keep_alive = int(os.environ.get('GUNICORN_KEEP_ALIVE', 2)) | ||
worker_class = os.environ.get('GUNICORN_WORKER_CLASS', 'model_engine_server.api.worker.LaunchWorker') | ||
|
||
command = [ | ||
"gunicorn", | ||
"--bind", | ||
f"[::]:{port}", | ||
"--timeout", | ||
"60", | ||
str(timeout), | ||
"--graceful-timeout", | ||
"60", | ||
str(graceful_timeout), | ||
"--keep-alive", | ||
"2", | ||
str(keep_alive), | ||
"--worker-class", | ||
"model_engine_server.api.worker.LaunchWorker", | ||
worker_class, | ||
"--workers", | ||
f"{num_workers}", | ||
*additional_args, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
anecdotally, we found it a lot easier to performance tune pure uvicorn, so we actually migrated most usage of gunicorn back to uvicorn. That being said, won't block your usage of it