-
Notifications
You must be signed in to change notification settings - Fork 163
fix: Redact sensitive data from OTEL traces and fix env var parsing #1553
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,7 @@ | |
import os | ||
|
||
from contextlib import contextmanager | ||
|
||
from urllib.parse import urlparse | ||
from google.api_core import exceptions as api_exceptions | ||
from google.api_core import retry as api_retry | ||
from google.cloud.storage import __version__ | ||
|
@@ -28,7 +28,15 @@ | |
ENABLE_OTEL_TRACES_ENV_VAR = "ENABLE_GCS_PYTHON_CLIENT_OTEL_TRACES" | ||
_DEFAULT_ENABLE_OTEL_TRACES_VALUE = False | ||
|
||
enable_otel_traces = os.environ.get( | ||
|
||
def _parse_bool_env(name: str, default: bool = False) -> bool: | ||
val = os.environ.get(name, None) | ||
if val is None: | ||
return default | ||
return str(val).strip().lower() in {"1", "true", "yes", "on"} | ||
|
||
|
||
enable_otel_traces = _parse_bool_env( | ||
ENABLE_OTEL_TRACES_ENV_VAR, _DEFAULT_ENABLE_OTEL_TRACES_VALUE | ||
) | ||
logger = logging.getLogger(__name__) | ||
|
@@ -105,15 +113,37 @@ def _set_api_request_attr(request, client): | |
if request.get("method"): | ||
attr["http.request.method"] = request.get("method") | ||
if request.get("path"): | ||
path = request.get("path") | ||
full_path = f"{client._connection.API_BASE_URL}{path}" | ||
attr["url.full"] = full_path | ||
if request.get("timeout"): | ||
attr["connect_timeout,read_timeout"] = request.get("timeout") | ||
full_url = client._connection.build_api_url(request.get("path")) | ||
attr.update(_get_opentelemetry_attributes_from_url(full_url, strip_query=True)) | ||
if "timeout" in request: | ||
attr["connect_timeout,read_timeout"] = str(request.get("timeout")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there's no need to change timeout section of the code and convert into str() |
||
return attr | ||
|
||
|
||
def _set_retry_attr(retry, conditional_predicate=None): | ||
predicate = conditional_predicate if conditional_predicate else retry._predicate | ||
retry_info = f"multiplier{retry._multiplier}/deadline{retry._deadline}/max{retry._maximum}/initial{retry._initial}/predicate{predicate}" | ||
return {"retry": retry_info} | ||
|
||
|
||
def _get_opentelemetry_attributes_from_url(url, strip_query=True): | ||
"""Helper to assemble OpenTelemetry span attributes from a URL.""" | ||
u = urlparse(url) | ||
netloc = u.netloc | ||
# u.hostname is always lowercase. We parse netloc to preserve casing. | ||
# netloc format: [userinfo@]host[:port] | ||
if "@" in netloc: | ||
netloc = netloc.split("@", 1)[1] | ||
if ":" in netloc and not netloc.endswith("]"): # Handle IPv6 literal | ||
netloc = netloc.split(":", 1)[0] | ||
|
||
attributes = { | ||
"server.address": netloc, | ||
"server.port": u.port, | ||
"url.scheme": u.scheme, | ||
"url.path": u.path, | ||
} | ||
if not strip_query: | ||
attributes["url.query"] = u.query | ||
|
||
return attributes |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to add one more unit test . It should test that tracing are disabled when a user sets ie negative unit test of |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -136,7 +136,7 @@ def test_get_final_attributes(setup, setup_optin): | |
} | ||
api_request = { | ||
"method": "GET", | ||
"path": "/foo/bar/baz", | ||
"path": "/foo/bar/baz?sensitive=true", | ||
"timeout": (100, 100), | ||
} | ||
retry_obj = api_retry.Retry() | ||
|
@@ -147,15 +147,19 @@ def test_get_final_attributes(setup, setup_optin): | |
"rpc.system": "http", | ||
"user_agent.original": f"gcloud-python/{__version__}", | ||
"http.request.method": "GET", | ||
"url.full": "https://testOtel.org/foo/bar/baz", | ||
"connect_timeout,read_timeout": (100, 100), | ||
"server.address": "testOtel.org", | ||
"url.path": "/foo/bar/baz", | ||
"url.scheme": "https", | ||
"connect_timeout,read_timeout": str((100, 100)), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we don't change |
||
"retry": f"multiplier{retry_obj._multiplier}/deadline{retry_obj._deadline}/max{retry_obj._maximum}/initial{retry_obj._initial}/predicate{retry_obj._predicate}", | ||
} | ||
expected_attributes.update(_opentelemetry_tracing._cloud_trace_adoption_attrs) | ||
|
||
with mock.patch("google.cloud.storage.client.Client") as test_client: | ||
test_client.project = "test_project" | ||
test_client._connection.API_BASE_URL = "https://testOtel.org" | ||
test_client._connection.build_api_url.return_value = ( | ||
"https://testOtel.org/foo/bar/baz?sensitive=true" | ||
) | ||
with _opentelemetry_tracing.create_trace_span( | ||
test_span_name, | ||
attributes=test_span_attributes, | ||
|
@@ -165,6 +169,7 @@ def test_get_final_attributes(setup, setup_optin): | |
) as span: | ||
assert span is not None | ||
assert span.name == test_span_name | ||
assert "url.query" not in span.attributes | ||
assert span.attributes == expected_attributes | ||
|
||
|
||
|
@@ -196,23 +201,73 @@ def test_set_conditional_retry_attr(setup, setup_optin): | |
assert span.attributes == expected_attributes | ||
|
||
|
||
def test_set_api_request_attr(): | ||
from google.cloud.storage import Client | ||
def test__get_opentelemetry_attributes_from_url(): | ||
url = "https://example.com:8080/path?query=true" | ||
expected = { | ||
"server.address": "example.com", | ||
"server.port": 8080, | ||
"url.scheme": "https", | ||
"url.path": "/path", | ||
} | ||
# Test stripping query | ||
attrs = _opentelemetry_tracing._get_opentelemetry_attributes_from_url( | ||
url, strip_query=True | ||
) | ||
assert attrs == expected | ||
assert "url.query" not in attrs | ||
|
||
test_client = Client() | ||
args_method = {"method": "GET"} | ||
expected_attributes = {"http.request.method": "GET"} | ||
attr = _opentelemetry_tracing._set_api_request_attr(args_method, test_client) | ||
assert attr == expected_attributes | ||
# Test not stripping query | ||
expected["url.query"] = "query=true" | ||
attrs = _opentelemetry_tracing._get_opentelemetry_attributes_from_url( | ||
url, strip_query=False | ||
) | ||
assert attrs == expected | ||
|
||
args_path = {"path": "/foo/bar/baz"} | ||
expected_attributes = {"url.full": "https://storage.googleapis.com/foo/bar/baz"} | ||
attr = _opentelemetry_tracing._set_api_request_attr(args_path, test_client) | ||
assert attr == expected_attributes | ||
|
||
args_timeout = {"timeout": (100, 100)} | ||
def test__get_opentelemetry_attributes_from_url_with_query(): | ||
url = "https://example.com/path?query=true&another=false" | ||
expected = { | ||
"server.address": "example.com", | ||
"server.port": None, | ||
"url.scheme": "https", | ||
"url.path": "/path", | ||
"url.query": "query=true&another=false", | ||
} | ||
# Test not stripping query | ||
attrs = _opentelemetry_tracing._get_opentelemetry_attributes_from_url( | ||
url, strip_query=False | ||
) | ||
assert attrs == expected | ||
|
||
|
||
def test_set_api_request_attr_with_pii_in_query(): | ||
client = mock.Mock() | ||
client._connection.build_api_url.return_value = ( | ||
"https://example.com/path?sensitive=true&token=secret" | ||
) | ||
|
||
request = { | ||
"method": "GET", | ||
"path": "/path?sensitive=true&token=secret", | ||
"timeout": 60, | ||
} | ||
expected_attributes = { | ||
"connect_timeout,read_timeout": (100, 100), | ||
"http.request.method": "GET", | ||
"server.address": "example.com", | ||
"server.port": None, | ||
"url.scheme": "https", | ||
"url.path": "/path", | ||
"connect_timeout,read_timeout": "60", | ||
} | ||
attr = _opentelemetry_tracing._set_api_request_attr(args_timeout, test_client) | ||
attr = _opentelemetry_tracing._set_api_request_attr(request, client) | ||
assert attr == expected_attributes | ||
assert "url.query" not in attr # Ensure query with PII is not captured | ||
|
||
|
||
def test_set_api_request_attr_no_timeout(): | ||
client = mock.Mock() | ||
client._connection.build_api_url.return_value = "https://example.com/path" | ||
|
||
request = {"method": "GET", "path": "/path"} | ||
attr = _opentelemetry_tracing._set_api_request_attr(request, client) | ||
assert "connect_timeout,read_timeout" not in attr |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: coercing to str is not required. Just
val.strip().lower()
should work fine because output fromos.environ.get
is always str or None. If None, it's caught in line35 anddefault
is returned.