Skip to content

Commit 15df97c

Browse files
authored
Fix token refresh deadlock issue (#134)
We have gotten reports of infinite lopping for the last ~4 minutes of token ttl when reading parquet form GCS with Dapla toolbelt. The issue should be resolved in this PR by override the refresh_handler, this is the intended way to provide credentials with custom logic for fetching tokens and it does not result in a deadlock issues. Previously, we directly overrode the refresh method. However, this approach led to deadlock issues in gcsfs/credentials.py's maybe_refresh method. Other changes: Since we can't force a refresh, the threshold is lowered to the old value of 20s to keep us from waiting ~4 minutes for a new token. Refresh window was modified in: googleapis/google-auth-library-python@c6af1d6 Issue recreation steps for https://jupyter.dapla.ssb.no/ and Dapla Lab VSCode: This code would freeze when ttl of token was less than 3m 45s: import time import dapla as dp import pandas as pd import inspect import logging logging.basicConfig() logging.getLogger().setLevel(logging.DEBUG) hvilkensomhelststi = "ssb-prod-dapla-felles-data-delt/GIS/testdata/noen_boligbygg_oslo.parquet" while True: print(dp.read_pandas(hvilkensomhelststi)) print(f"{pd.Timestamp.now().round('s')=}") time.sleep(0.1)
1 parent 41574e5 commit 15df97c

File tree

5 files changed

+79
-27
lines changed

5 files changed

+79
-27
lines changed

noxfile.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,11 @@ def typeguard(session: Session) -> None:
200200
"""Runtime type checking using Typeguard."""
201201
session.install(".")
202202
session.install("pytest", "typeguard", "pygments", "responses")
203-
session.run("pytest", f"--typeguard-packages={package}", *session.posargs)
203+
session.run(
204+
"pytest",
205+
f"--typeguard-packages={package} --ignore=tests/test_auth.py",
206+
*session.posargs,
207+
)
204208

205209

206210
@session(python=python_versions)

poetry.lock

Lines changed: 15 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "dapla-toolbelt"
3-
version = "2.0.11"
3+
version = "2.0.12"
44
description = "Dapla Toolbelt"
55
authors = ["Dapla Developers <dapla-platform-developers@ssb.no>"]
66
license = "MIT"
@@ -54,6 +54,7 @@ types-requests = ">=2.28.11"
5454
pyarrow-stubs = ">=10.0.1.7"
5555
google-auth-stubs = ">=0.2.0" # Not maintained by Google, should change if Google releases their own stubs
5656
pandas-stubs = ">=2.0.0"
57+
pytest-timeout = "^2.3.1"
5758

5859
[tool.pytest.ini_options]
5960
pythonpath = ["src"]

src/dapla/auth.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from collections.abc import Sequence
55
from datetime import datetime
66
from datetime import timedelta
7-
from functools import partial
87
from typing import Any
98
from typing import Optional
109

@@ -16,6 +15,13 @@
1615
from IPython.display import display
1716
from jupyterhub.services.auth import HubAuth
1817

18+
# Refresh window was modified in: https://github.com/googleapis/google-auth-library-python/commit/c6af1d692b43833baca978948376739547cf685a
19+
# The change was directed towards high latency environments, and should not apply to us.
20+
# Since we can't force a refresh, the threshold is lowered to keep us from waiting ~4 minutes for a new token.
21+
# A permanent fix would be to supply credentials with a refresh endpoint
22+
# that allways returns a token that is valid for more than 3m 45s.
23+
google.auth._helpers.REFRESH_THRESHOLD = timedelta(seconds=20)
24+
1925

2026
class AuthClient:
2127
"""Client for retrieving authentication information."""
@@ -107,24 +113,22 @@ def fetch_google_credentials() -> Credentials:
107113
"""
108114
if AuthClient.is_ready():
109115
try:
116+
117+
def _refresh_handler(
118+
request: google.auth.transport.Request, scopes: Sequence[str]
119+
) -> tuple[str, datetime]:
120+
# We manually override the refresh_handler method with our custom logic for fetching tokens.
121+
# Previously, we directly overrode the `refresh` method. However, this
122+
# approach led to deadlock issues in gcsfs/credentials.py's maybe_refresh method.
123+
return AuthClient.fetch_google_token()
124+
110125
token, expiry = AuthClient.fetch_google_token()
111126
credentials = Credentials(
112127
token=token,
113128
expiry=expiry,
114129
token_uri="https://oauth2.googleapis.com/token",
130+
refresh_handler=_refresh_handler,
115131
)
116-
117-
def _refresh(self: Credentials, request: Any) -> None:
118-
token, expiry = AuthClient.fetch_google_token(request)
119-
self.token = token
120-
self.expiry = expiry
121-
122-
# We need to manually override the refresh method.
123-
# This is because the "Credentials" class' built-in refresh method
124-
# requires that the token be *at least valid for 3 minutes and 45 seconds*.
125-
# We cannot make this guarantee in JupyterHub due to the implementation
126-
# of our TokenExchange endpoint.
127-
credentials.refresh = partial(_refresh, credentials) # type: ignore[method-assign]
128132
except AuthError as err:
129133
err._print_warning()
130134

tests/test_gcs.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,47 @@
1+
from datetime import timedelta
2+
from unittest.mock import Mock
3+
from unittest.mock import patch
4+
5+
import pytest
6+
from gcsfs.retry import HttpError
7+
from google.auth._helpers import utcnow
8+
9+
from dapla import pandas as dp
110
from dapla.gcs import GCSFileSystem
211

312

413
def test_instance() -> None:
514
# Chack that instantiation works with the current version of pyarrow
615
client = GCSFileSystem()
716
assert client is not None
17+
18+
19+
@pytest.mark.timeout(
20+
30
21+
) # Times the test out after 30 sec, this is will happen if a deadlock happens
22+
@patch("dapla.auth.AuthClient.is_ready")
23+
@patch("dapla.auth.AuthClient.fetch_google_token")
24+
def test_gcs_deadlock(mock_fetch_google_token: Mock, mock_is_ready: Mock) -> None:
25+
# When overriding the refresh method we experienced a deadlock, resulting in the credentials never being refreshed
26+
# This test checks that the credentials object is updated on refresh
27+
# and that it proceeds to the next step when a valid token is provided.
28+
29+
mock_is_ready.return_value = True # Mock client ready to not use ADC
30+
mock_fetch_google_token.side_effect = [
31+
("FakeToken1", utcnow()), # type: ignore[no-untyped-call]
32+
("FakeToken2", utcnow()), # type: ignore[no-untyped-call]
33+
("FakeToken3", utcnow()), # type: ignore[no-untyped-call]
34+
("FakeToken4", utcnow()), # type: ignore[no-untyped-call]
35+
("FakeToken5Valid", utcnow() + timedelta(seconds=30)), # type: ignore[no-untyped-call]
36+
]
37+
38+
gcs_path = "gs://ssb-dapla-pseudo-data-produkt-test/integration_tests_data/personer.parquet"
39+
with pytest.raises(
40+
HttpError
41+
) as exc_info: # Since we supply invalid credentials an error should be raised
42+
dp.read_pandas(gcs_path)
43+
assert "Invalid Credentials" in str(exc_info.value)
44+
assert (
45+
mock_fetch_google_token.call_count == 5
46+
) # mock_fetch_google_token is called as part of refresh
47+
# until a token that has not expired is returned

0 commit comments

Comments
 (0)