Skip to content

Commit 3111f9b

Browse files
bpkrothCopilotpre-commit-ci[bot]motus
authored
Attempt to sanitize some configurations before we output them (#988)
# Pull Request ## Title Attempt to sanitize some configurations before we output them ______________________________________________________________________ ## Description Address some code scanning complaints and try to sanitize some configs before outputing them. > Note: this is not meant as a complete fix for those warnings, but only a start. We can fix more locations using `sanitize_config` in the future prior to printing. ______________________________________________________________________ ## Type of Change - 🛠️ Bug fix ______________________________________________________________________ ## Testing - Security scans and other usual CI ______________________________________________________________________ ## Additional Notes Also includes link fixes from #989 ______________________________________________________________________ --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Sergiy Matusevych <sergiym@microsoft.com>
1 parent 44985eb commit 3111f9b

File tree

3 files changed

+107
-2
lines changed

3 files changed

+107
-2
lines changed

mlos_bench/mlos_bench/services/config_persistence.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
merge_parameters,
3434
path_join,
3535
preprocess_dynamic_configs,
36+
sanitize_config,
3637
)
3738

3839
if TYPE_CHECKING:
@@ -277,7 +278,7 @@ def prepare_class_load(
277278
_LOG.debug(
278279
"Instantiating: %s with config:\n%s",
279280
class_name,
280-
json5.dumps(class_config, indent=2),
281+
json5.dumps(sanitize_config(class_config), indent=2),
281282
)
282283

283284
return (class_name, class_config)
@@ -574,7 +575,13 @@ def build_service(
574575
services from the list plus the parent mix-in.
575576
"""
576577
if _LOG.isEnabledFor(logging.DEBUG):
577-
_LOG.debug("Build service from config:\n%s", json5.dumps(config, indent=2))
578+
_LOG.debug(
579+
"Build service from config:\n%s",
580+
json5.dumps(
581+
sanitize_config(config),
582+
indent=2,
583+
),
584+
)
578585

579586
assert isinstance(config, dict)
580587
config_list: list[dict[str, Any]]
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#
2+
# Copyright (c) Microsoft Corporation.
3+
# Licensed under the MIT License.
4+
#
5+
"""
6+
Unit tests for sanitize_conf utility function.
7+
8+
Tests cover obfuscation of sensitive keys and recursive sanitization.
9+
"""
10+
from mlos_bench.util import sanitize_config
11+
12+
13+
def test_sanitize_config_simple() -> None:
14+
"""Test sanitization of a simple configuration dictionary."""
15+
config = {
16+
"username": "user1",
17+
"password": "mypassword",
18+
"token": "abc123",
19+
"api_key": "key",
20+
"secret": "shh",
21+
"other": 42,
22+
}
23+
sanitized = sanitize_config(config)
24+
assert sanitized["username"] == "user1"
25+
assert sanitized["password"] == "[REDACTED]"
26+
assert sanitized["token"] == "[REDACTED]"
27+
assert sanitized["api_key"] == "[REDACTED]"
28+
assert sanitized["secret"] == "[REDACTED]"
29+
assert sanitized["other"] == 42
30+
31+
32+
def test_sanitize_config_nested() -> None:
33+
"""Test sanitization of nested dictionaries."""
34+
config = {
35+
"outer": {
36+
"password": "pw",
37+
"inner": {"token": "tok", "foo": "bar"},
38+
},
39+
"api_key": "key",
40+
}
41+
sanitized = sanitize_config(config)
42+
assert sanitized["outer"]["password"] == "[REDACTED]"
43+
assert sanitized["outer"]["inner"]["token"] == "[REDACTED]"
44+
assert sanitized["outer"]["inner"]["foo"] == "bar"
45+
assert sanitized["api_key"] == "[REDACTED]"
46+
47+
48+
def test_sanitize_config_no_sensitive_keys() -> None:
49+
"""Test that no changes are made if no sensitive keys are present."""
50+
config = {"foo": 1, "bar": {"baz": 2}}
51+
sanitized = sanitize_config(config)
52+
assert sanitized == config
53+
54+
55+
def test_sanitize_config_mixed_types() -> None:
56+
"""Test sanitization with mixed types including lists and dicts."""
57+
config = {
58+
"password": None,
59+
"token": 123,
60+
"secret": ["a", "b"],
61+
"api_key": {"nested": "val"},
62+
}
63+
sanitized = sanitize_config(config)
64+
assert sanitized["password"] == "[REDACTED]"
65+
assert sanitized["token"] == "[REDACTED]"
66+
assert sanitized["secret"] == "[REDACTED]"
67+
assert sanitized["api_key"] == "[REDACTED]"

mlos_bench/mlos_bench/util.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,3 +462,34 @@ def datetime_parser(
462462
if new_datetime_col.le(_MIN_TS).any():
463463
raise ValueError(f"Invalid date range in the data: {datetime_col}")
464464
return new_datetime_col
465+
466+
467+
def sanitize_config(config: dict[str, Any]) -> dict[str, Any]:
468+
"""
469+
Sanitize a configuration dictionary by obfuscating potentially sensitive keys.
470+
471+
Parameters
472+
----------
473+
config : dict
474+
Configuration dictionary to sanitize.
475+
476+
Returns
477+
-------
478+
dict
479+
Sanitized configuration dictionary.
480+
"""
481+
sanitize_keys = {"password", "secret", "token", "api_key"}
482+
483+
def recursive_sanitize(conf: dict[str, Any]) -> dict[str, Any]:
484+
"""Recursively sanitize a dictionary."""
485+
sanitized = {}
486+
for k, v in conf.items():
487+
if k in sanitize_keys:
488+
sanitized[k] = "[REDACTED]"
489+
elif isinstance(v, dict):
490+
sanitized[k] = recursive_sanitize(v) # type: ignore[assignment]
491+
else:
492+
sanitized[k] = v
493+
return sanitized
494+
495+
return recursive_sanitize(config)

0 commit comments

Comments
 (0)