Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 37 additions & 7 deletions google/cloud/storage/_opentelemetry_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import os

from contextlib import contextmanager

from urllib.parse import urlparse
from google.api_core import exceptions as api_exceptions
from google.api_core import retry as api_retry
from google.cloud.storage import __version__
Expand All @@ -28,7 +28,15 @@
ENABLE_OTEL_TRACES_ENV_VAR = "ENABLE_GCS_PYTHON_CLIENT_OTEL_TRACES"
_DEFAULT_ENABLE_OTEL_TRACES_VALUE = False

enable_otel_traces = os.environ.get(

def _parse_bool_env(name: str, default: bool = False) -> bool:
val = os.environ.get(name, None)
if val is None:
return default
return str(val).strip().lower() in {"1", "true", "yes", "on"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: coercing to str is not required. Just val.strip().lower() should work fine because output from os.environ.get is always str or None. If None, it's caught in line35 and default is returned.



enable_otel_traces = _parse_bool_env(
ENABLE_OTEL_TRACES_ENV_VAR, _DEFAULT_ENABLE_OTEL_TRACES_VALUE
)
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -105,15 +113,37 @@ def _set_api_request_attr(request, client):
if request.get("method"):
attr["http.request.method"] = request.get("method")
if request.get("path"):
path = request.get("path")
full_path = f"{client._connection.API_BASE_URL}{path}"
attr["url.full"] = full_path
if request.get("timeout"):
attr["connect_timeout,read_timeout"] = request.get("timeout")
full_url = client._connection.build_api_url(request.get("path"))
attr.update(_get_opentelemetry_attributes_from_url(full_url, strip_query=True))
if "timeout" in request:
attr["connect_timeout,read_timeout"] = str(request.get("timeout"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there's no need to change timeout section of the code and convert into str()

return attr


def _set_retry_attr(retry, conditional_predicate=None):
predicate = conditional_predicate if conditional_predicate else retry._predicate
retry_info = f"multiplier{retry._multiplier}/deadline{retry._deadline}/max{retry._maximum}/initial{retry._initial}/predicate{predicate}"
return {"retry": retry_info}


def _get_opentelemetry_attributes_from_url(url, strip_query=True):
"""Helper to assemble OpenTelemetry span attributes from a URL."""
u = urlparse(url)
netloc = u.netloc
# u.hostname is always lowercase. We parse netloc to preserve casing.
# netloc format: [userinfo@]host[:port]
if "@" in netloc:
netloc = netloc.split("@", 1)[1]
if ":" in netloc and not netloc.endswith("]"): # Handle IPv6 literal
netloc = netloc.split(":", 1)[0]

attributes = {
"server.address": netloc,
"server.port": u.port,
"url.scheme": u.scheme,
"url.path": u.path,
}
if not strip_query:
attributes["url.query"] = u.query

return attributes
30 changes: 14 additions & 16 deletions google/cloud/storage/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@
from google.cloud._helpers import _rfc3339_nanos_to_datetime
from google.cloud._helpers import _to_bytes
from google.cloud.exceptions import NotFound
from google.cloud.storage._opentelemetry_tracing import (
_get_opentelemetry_attributes_from_url,
)
from google.cloud.storage._helpers import _add_etag_match_headers
from google.cloud.storage._helpers import _add_generation_match_parameters
from google.cloud.storage._helpers import _PropertyMixin
Expand Down Expand Up @@ -1067,13 +1070,11 @@ def _do_download(
Please enable this as per your use case.
"""

extra_attributes = {
"url.full": download_url,
"download.chunk_size": f"{self.chunk_size}",
"download.raw_download": raw_download,
"upload.checksum": f"{checksum}",
"download.single_shot_download": single_shot_download,
}
extra_attributes = _get_opentelemetry_attributes_from_url(download_url)
extra_attributes["download.chunk_size"] = f"{self.chunk_size}"
extra_attributes["download.raw_download"] = raw_download
extra_attributes["upload.checksum"] = f"{checksum}"
extra_attributes["download.single_shot_download"] = single_shot_download
args = {"timeout": timeout}

if self.chunk_size is None:
Expand Down Expand Up @@ -2063,10 +2064,8 @@ def _do_multipart_upload(
upload_url, headers=headers, checksum=checksum, retry=retry
)

extra_attributes = {
"url.full": upload_url,
"upload.checksum": f"{checksum}",
}
extra_attributes = _get_opentelemetry_attributes_from_url(upload_url)
extra_attributes["upload.checksum"] = f"{checksum}"
args = {"timeout": timeout}
with create_trace_span(
name="Storage.MultipartUpload/transmit",
Expand Down Expand Up @@ -2396,11 +2395,10 @@ def _do_resumable_upload(
retry=retry,
command=command,
)
extra_attributes = {
"url.full": upload.resumable_url,
"upload.chunk_size": upload.chunk_size,
"upload.checksum": f"{checksum}",
}
extra_attributes = _get_opentelemetry_attributes_from_url(upload.resumable_url)
extra_attributes["upload.chunk_size"] = upload.chunk_size
extra_attributes["upload.checksum"] = f"{checksum}"

args = {"timeout": timeout}
with create_trace_span(
name="Storage.ResumableUpload/transmitNextChunk",
Expand Down
91 changes: 73 additions & 18 deletions tests/unit/test__opentelemetry_tracing.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to add one more unit test . It should test that tracing are disabled when a user sets ENABLE_GCS_PYTHON_CLIENT_OTEL_TRACES to False (or any value other than {1, true, on, yes} )

ie negative unit test of test_enable_trace_yield_span

Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def test_get_final_attributes(setup, setup_optin):
}
api_request = {
"method": "GET",
"path": "/foo/bar/baz",
"path": "/foo/bar/baz?sensitive=true",
"timeout": (100, 100),
}
retry_obj = api_retry.Retry()
Expand All @@ -147,15 +147,19 @@ def test_get_final_attributes(setup, setup_optin):
"rpc.system": "http",
"user_agent.original": f"gcloud-python/{__version__}",
"http.request.method": "GET",
"url.full": "https://testOtel.org/foo/bar/baz",
"connect_timeout,read_timeout": (100, 100),
"server.address": "testOtel.org",
"url.path": "/foo/bar/baz",
"url.scheme": "https",
"connect_timeout,read_timeout": str((100, 100)),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we don't change timeout param in the production file, we need not change the same here in test file.

"retry": f"multiplier{retry_obj._multiplier}/deadline{retry_obj._deadline}/max{retry_obj._maximum}/initial{retry_obj._initial}/predicate{retry_obj._predicate}",
}
expected_attributes.update(_opentelemetry_tracing._cloud_trace_adoption_attrs)

with mock.patch("google.cloud.storage.client.Client") as test_client:
test_client.project = "test_project"
test_client._connection.API_BASE_URL = "https://testOtel.org"
test_client._connection.build_api_url.return_value = (
"https://testOtel.org/foo/bar/baz?sensitive=true"
)
with _opentelemetry_tracing.create_trace_span(
test_span_name,
attributes=test_span_attributes,
Expand All @@ -165,6 +169,7 @@ def test_get_final_attributes(setup, setup_optin):
) as span:
assert span is not None
assert span.name == test_span_name
assert "url.query" not in span.attributes
assert span.attributes == expected_attributes


Expand Down Expand Up @@ -196,23 +201,73 @@ def test_set_conditional_retry_attr(setup, setup_optin):
assert span.attributes == expected_attributes


def test_set_api_request_attr():
from google.cloud.storage import Client
def test__get_opentelemetry_attributes_from_url():
url = "https://example.com:8080/path?query=true"
expected = {
"server.address": "example.com",
"server.port": 8080,
"url.scheme": "https",
"url.path": "/path",
}
# Test stripping query
attrs = _opentelemetry_tracing._get_opentelemetry_attributes_from_url(
url, strip_query=True
)
assert attrs == expected
assert "url.query" not in attrs

test_client = Client()
args_method = {"method": "GET"}
expected_attributes = {"http.request.method": "GET"}
attr = _opentelemetry_tracing._set_api_request_attr(args_method, test_client)
assert attr == expected_attributes
# Test not stripping query
expected["url.query"] = "query=true"
attrs = _opentelemetry_tracing._get_opentelemetry_attributes_from_url(
url, strip_query=False
)
assert attrs == expected

args_path = {"path": "/foo/bar/baz"}
expected_attributes = {"url.full": "https://storage.googleapis.com/foo/bar/baz"}
attr = _opentelemetry_tracing._set_api_request_attr(args_path, test_client)
assert attr == expected_attributes

args_timeout = {"timeout": (100, 100)}
def test__get_opentelemetry_attributes_from_url_with_query():
url = "https://example.com/path?query=true&another=false"
expected = {
"server.address": "example.com",
"server.port": None,
"url.scheme": "https",
"url.path": "/path",
"url.query": "query=true&another=false",
}
# Test not stripping query
attrs = _opentelemetry_tracing._get_opentelemetry_attributes_from_url(
url, strip_query=False
)
assert attrs == expected


def test_set_api_request_attr_with_pii_in_query():
client = mock.Mock()
client._connection.build_api_url.return_value = (
"https://example.com/path?sensitive=true&token=secret"
)

request = {
"method": "GET",
"path": "/path?sensitive=true&token=secret",
"timeout": 60,
}
expected_attributes = {
"connect_timeout,read_timeout": (100, 100),
"http.request.method": "GET",
"server.address": "example.com",
"server.port": None,
"url.scheme": "https",
"url.path": "/path",
"connect_timeout,read_timeout": "60",
}
attr = _opentelemetry_tracing._set_api_request_attr(args_timeout, test_client)
attr = _opentelemetry_tracing._set_api_request_attr(request, client)
assert attr == expected_attributes
assert "url.query" not in attr # Ensure query with PII is not captured


def test_set_api_request_attr_no_timeout():
client = mock.Mock()
client._connection.build_api_url.return_value = "https://example.com/path"

request = {"method": "GET", "path": "/path"}
attr = _opentelemetry_tracing._set_api_request_attr(request, client)
assert "connect_timeout,read_timeout" not in attr
2 changes: 2 additions & 0 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,7 @@ def test__list_resource_w_defaults(self):
credentials = _make_credentials()
client = self._make_one(project=project, credentials=credentials)
connection = client._base_connection = _make_connection()
connection.build_api_url = mock.Mock(return_value="http://example.com" + path)

iterator = client._list_resource(
path=path,
Expand Down Expand Up @@ -719,6 +720,7 @@ def test__list_resource_w_explicit(self):
credentials = _make_credentials()
client = self._make_one(project=project, credentials=credentials)
connection = client._base_connection = _make_connection()
connection.build_api_url = mock.Mock(return_value="http://example.com" + path)

iterator = client._list_resource(
path=path,
Expand Down