Skip to content
This repository was archived by the owner on Sep 29, 2025. It is now read-only.

Commit a9f3aa9

Browse files
committed
feat(pems_data/cache): helpers to get/set DataFrames
use the DataFrame serialization functions as mutators
1 parent 7662f52 commit a9f3aa9

File tree

4 files changed

+72
-8
lines changed

4 files changed

+72
-8
lines changed

pems_data/src/pems_data/cache.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@
22
import os
33
from typing import Any, Callable
44

5+
import pandas as pd
56
import redis
67

8+
from pems_data.serialization import arrow_bytes_to_df, df_to_arrow_bytes
9+
710
logger = logging.getLogger(__name__)
811

912

@@ -76,6 +79,10 @@ def get(self, key: str, mutate_func: Callable[[Any], Any] = None) -> Any:
7679
logger.warning(f"cache unavailable to get: {key}")
7780
return None
7881

82+
def get_df(self, key: str) -> pd.DataFrame:
83+
"""Get a `pandas.DataFrame` from the cache, or None if the key doesn't exist."""
84+
return self.get(key, mutate_func=arrow_bytes_to_df)
85+
7986
def set(self, key: str, value: Any, mutate_func: Callable[[Any], Any] = None) -> None:
8087
"""Set a value in the cache.
8188
@@ -92,3 +99,7 @@ def set(self, key: str, value: Any, mutate_func: Callable[[Any], Any] = None) ->
9299
self.c.set(key, value)
93100
else:
94101
logger.warning(f"cache unavailable to set: {key}")
102+
103+
def set_df(self, key: str, value: pd.DataFrame) -> None:
104+
"""Set a `pandas.DataFrame` in the cache."""
105+
self.set(key, value, mutate_func=df_to_arrow_bytes)

tests/pytest/pems_data/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import pandas as pd
2+
import pytest
3+
4+
5+
@pytest.fixture
6+
def df() -> pd.DataFrame:
7+
return pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})

tests/pytest/pems_data/test_cache.py

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
import redis
1+
import pandas as pd
22
import pytest
3+
import redis
34

45
from pems_data.cache import Cache, redis_connection
56

@@ -107,7 +108,7 @@ def test_get(self, cache: Cache, mock_redis_connection, spy_connect):
107108
spy_connect.assert_called_once()
108109
mock_redis_connection.get.assert_called_once_with("test-key")
109110

110-
def test_get_mutate(self, cache: Cache, mock_redis_connection, spy_connect):
111+
def test_get__mutate(self, cache: Cache, mock_redis_connection, spy_connect):
111112
expected = 2
112113
mock_redis_connection.get.return_value = 1
113114

@@ -123,10 +124,60 @@ def test_set(self, cache: Cache, mock_redis_connection, spy_connect):
123124
spy_connect.assert_called_once()
124125
mock_redis_connection.set.assert_called_once_with("test-key", "test-value")
125126

126-
def test_set_mutate(self, cache: Cache, mock_redis_connection, spy_connect):
127+
def test_set__mutate(self, cache: Cache, mock_redis_connection, spy_connect):
127128
expected = 2
128129

129130
cache.set("test-key", 1, lambda v: v + 1)
130131

131132
spy_connect.assert_called_once()
132133
mock_redis_connection.set.assert_called_once_with("test-key", expected)
134+
135+
def test_get_df(self, cache: Cache, mock_redis_connection, mocker, spy_connect):
136+
# Mock arrow_bytes_to_df to return a DataFrame
137+
df = mocker.Mock(spec=pd.DataFrame)
138+
arrow_bytes = b"arrow-bytes"
139+
mock_redis_connection.get.return_value = arrow_bytes
140+
mock_arrow_bytes_to_df = mocker.patch("pems_data.cache.arrow_bytes_to_df", return_value=df)
141+
142+
result = cache.get_df("df-key")
143+
144+
spy_connect.assert_called_once()
145+
mock_redis_connection.get.assert_called_once_with("df-key")
146+
mock_arrow_bytes_to_df.assert_called_once_with(arrow_bytes)
147+
assert result == df
148+
149+
def test_set_df(self, cache: Cache, mock_redis_connection, spy_connect, df):
150+
"""Test setting a DataFrame in the cache"""
151+
cache.set_df("test-key", df)
152+
153+
spy_connect.assert_called_once()
154+
mock_redis_connection.set.assert_called_once()
155+
# Verify first arg is the key
156+
assert mock_redis_connection.set.call_args[0][0] == "test-key"
157+
# Verify second arg is bytes (arrow serialized)
158+
assert isinstance(mock_redis_connection.set.call_args[0][1], bytes)
159+
160+
def test_set_df__empty_df(self, cache: Cache, mock_redis_connection, spy_connect):
161+
empty_df = pd.DataFrame()
162+
cache.set_df("test-key", empty_df)
163+
164+
spy_connect.assert_called_once()
165+
mock_redis_connection.set.assert_called_once()
166+
# Verify empty DataFrame is handled
167+
assert isinstance(mock_redis_connection.set.call_args[0][1], bytes)
168+
169+
def test_set_df__roundtrip(self, cache: Cache, mock_redis_connection, spy_connect, df, mocker):
170+
# Setup mock to return the serialized value on get
171+
def mock_set(key, value):
172+
mock_redis_connection.get.return_value = value
173+
174+
mock_redis_connection.set.side_effect = mock_set
175+
176+
# Set the DataFrame
177+
cache.set_df("test-key", df)
178+
179+
# Get it back
180+
result = cache.get_df("test-key")
181+
182+
pd.testing.assert_frame_equal(result, df)
183+
spy_connect.assert_has_calls([mocker.call(), mocker.call()])

tests/pytest/pems_data/test_serialization.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,6 @@
55
from pems_data.serialization import arrow_bytes_to_df, df_to_arrow_bytes
66

77

8-
@pytest.fixture
9-
def df():
10-
return pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
11-
12-
138
@pytest.fixture
149
def arrow_bytes(df):
1510
# convert df to actual arrow bytes for testing

0 commit comments

Comments
 (0)