Skip to content
This repository was archived by the owner on Sep 29, 2025. It is now read-only.

Commit a923251

Browse files
committed
feat(pems_data): initial station data helper classes
- metadata and imputed aggregate 5min URLs - get district metadata - get station imputed aggregate 5min data
1 parent 30bc510 commit a923251

File tree

2 files changed

+129
-0
lines changed

2 files changed

+129
-0
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import pandas as pd
2+
3+
from pems_data.s3 import S3Bucket
4+
5+
6+
class StationsBucket(S3Bucket):
7+
"""Station-specific bucket config."""
8+
9+
imputation_detector_agg_5min = "imputation/detector_imputed_agg_five_minutes"
10+
metadata_file = "geo/current_stations.parquet"
11+
12+
13+
class Stations:
14+
"""Station-specific data."""
15+
16+
def __init__(self, bucket: StationsBucket = StationsBucket()):
17+
self.bucket = bucket
18+
19+
def get_district_metadata(self, district_number: str) -> pd.DataFrame:
20+
"""Loads metadata for all stations in the selected District from S3."""
21+
22+
columns = [
23+
"STATION_ID",
24+
"NAME",
25+
"PHYSICAL_LANES",
26+
"STATE_POSTMILE",
27+
"ABSOLUTE_POSTMILE",
28+
"LATITUDE",
29+
"LONGITUDE",
30+
"LENGTH",
31+
"STATION_TYPE",
32+
"DISTRICT",
33+
"FREEWAY",
34+
"DIRECTION",
35+
"COUNTY_NAME",
36+
"CITY_NAME",
37+
]
38+
filters = [("DISTRICT", "=", district_number)]
39+
40+
return self.bucket.read_parquet(self.bucket.metadata_file, columns=columns, filters=filters)
41+
42+
def get_imputed_agg_5min(self, station_id: str) -> pd.DataFrame:
43+
"""Loads imputed aggregate 5 minute data for a specific station."""
44+
45+
columns = [
46+
"STATION_ID",
47+
"LANE",
48+
"SAMPLE_TIMESTAMP",
49+
"VOLUME_SUM",
50+
"SPEED_FIVE_MINS",
51+
"OCCUPANCY_AVG",
52+
]
53+
filters = [("STATION_ID", "=", station_id)]
54+
55+
return self.bucket.read_parquet(self.bucket.imputation_detector_agg_5min, columns=columns, filters=filters)
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import pandas as pd
2+
3+
from pems_data.stations import Stations, StationsBucket
4+
import pytest
5+
6+
7+
class TestStationsBucket:
8+
def test_imputation_detector_agg_5min(self):
9+
assert StationsBucket.imputation_detector_agg_5min == "imputation/detector_imputed_agg_five_minutes"
10+
11+
def test_metadata_file(self):
12+
assert StationsBucket.metadata_file == "geo/current_stations.parquet"
13+
14+
15+
class TestStations:
16+
@pytest.fixture
17+
def df(self):
18+
return pd.DataFrame({"STATION_ID": [1]})
19+
20+
@pytest.fixture
21+
def stations(self):
22+
return Stations()
23+
24+
@pytest.fixture
25+
def mock_read_parquet(self, df, mocker):
26+
return mocker.patch("pems_data.stations.StationsBucket.read_parquet", return_value=df)
27+
28+
def test_bucket(self, stations: Stations):
29+
assert isinstance(stations.bucket, StationsBucket)
30+
31+
def test_get_district_metadata(self, stations: Stations, df, mock_read_parquet):
32+
district_number = "7"
33+
result = stations.get_district_metadata(district_number)
34+
35+
mock_read_parquet.assert_called_once_with(
36+
StationsBucket.metadata_file,
37+
columns=[
38+
"STATION_ID",
39+
"NAME",
40+
"PHYSICAL_LANES",
41+
"STATE_POSTMILE",
42+
"ABSOLUTE_POSTMILE",
43+
"LATITUDE",
44+
"LONGITUDE",
45+
"LENGTH",
46+
"STATION_TYPE",
47+
"DISTRICT",
48+
"FREEWAY",
49+
"DIRECTION",
50+
"COUNTY_NAME",
51+
"CITY_NAME",
52+
],
53+
filters=[("DISTRICT", "=", district_number)],
54+
)
55+
assert result.equals(df)
56+
57+
def test_get_imputed_agg_5min(self, stations: Stations, df, mock_read_parquet):
58+
station_id = "123"
59+
60+
result = stations.get_imputed_agg_5min(station_id)
61+
62+
mock_read_parquet.assert_called_once_with(
63+
StationsBucket.imputation_detector_agg_5min,
64+
columns=[
65+
"STATION_ID",
66+
"LANE",
67+
"SAMPLE_TIMESTAMP",
68+
"VOLUME_SUM",
69+
"SPEED_FIVE_MINS",
70+
"OCCUPANCY_AVG",
71+
],
72+
filters=[("STATION_ID", "=", station_id)],
73+
)
74+
assert result.equals(df)

0 commit comments

Comments
 (0)