Skip to content

Commit 2f157b2

Browse files
authored
Merge pull request #890 from Lumiwealth/feature/timezone-and-sanitization-fixes
Align ThetaData fill and bump boto3 floor
2 parents 0e8e073 + 614fdec commit 2f157b2

13 files changed

+749
-199
lines changed

lumibot/backtesting/databento_backtesting_pandas.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,7 @@ def get_last_price(self, asset, quote=None, exchange=None):
410410
# OPTIMIZATION: Check cache first
411411
self._check_and_clear_cache()
412412
current_dt = self.get_datetime()
413+
current_dt_aware = to_datetime_aware(current_dt)
413414

414415
# Try to get data from our cached pandas_data first
415416
search_asset = asset
@@ -435,8 +436,6 @@ def get_last_price(self, asset, quote=None, exchange=None):
435436

436437
if not df.empty and 'close' in df.columns:
437438
# Ensure current_dt is timezone-aware for comparison
438-
current_dt_aware = to_datetime_aware(current_dt)
439-
440439
# Step back one bar so only fully closed bars are visible
441440
bar_delta = timedelta(minutes=1)
442441
if asset_data.timestep == "hour":
@@ -454,19 +453,45 @@ def get_last_price(self, asset, quote=None, exchange=None):
454453
filtered_df = df[df.index <= current_dt_aware]
455454

456455
if not filtered_df.empty:
457-
last_price = filtered_df['close'].iloc[-1]
458-
if not pd.isna(last_price):
459-
price = float(last_price)
456+
valid_closes = filtered_df['close'].dropna()
457+
if not valid_closes.empty:
458+
price = float(valid_closes.iloc[-1])
460459
# OPTIMIZATION: Cache the result
461460
self._last_price_cache[cache_key] = price
462461
return price
463462

464-
# If no cached data, try to get recent data
463+
# If no cached data, try to load it for the backtest window
464+
try:
465+
fetched_bars = self.get_historical_prices(
466+
asset_separated,
467+
length=1,
468+
quote=quote_asset,
469+
timestep="minute",
470+
)
471+
if fetched_bars is not None:
472+
asset_data = self.pandas_data.get(search_asset)
473+
if asset_data is not None:
474+
df = asset_data.df
475+
if not df.empty and 'close' in df.columns:
476+
valid_closes = df[df.index <= current_dt_aware]['close'].dropna()
477+
if not valid_closes.empty:
478+
price = float(valid_closes.iloc[-1])
479+
self._last_price_cache[cache_key] = price
480+
return price
481+
except Exception as exc:
482+
logger.debug(
483+
"Attempted to hydrate Databento cache for %s but hit error: %s",
484+
asset.symbol,
485+
exc,
486+
)
487+
488+
# If still no data, fall back to direct fetch (live-style)
465489
logger.warning(f"No cached data for {asset.symbol}, attempting direct fetch")
466490
return databento_helper.get_last_price_from_databento(
467491
api_key=self._api_key,
468492
asset=asset_separated,
469-
venue=exchange
493+
venue=exchange,
494+
reference_date=current_dt_aware
470495
)
471496

472497
except DataBentoAuthenticationError as e:

lumibot/backtesting/thetadata_backtesting_pandas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -771,7 +771,7 @@ def _update_pandas_data(self, asset, quote, length, timestep, start_dt=None):
771771
quote_columns = ['bid', 'ask', 'bid_size', 'ask_size', 'bid_condition', 'ask_condition', 'bid_exchange', 'ask_exchange']
772772
existing_quote_cols = [col for col in quote_columns if col in df.columns]
773773
if existing_quote_cols:
774-
df[existing_quote_cols] = df[existing_quote_cols].fillna(method='ffill')
774+
df[existing_quote_cols] = df[existing_quote_cols].ffill()
775775

776776
# Log how much forward filling occurred
777777
if 'bid' in df.columns and 'ask' in df.columns:

lumibot/components/options_helper.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -791,18 +791,11 @@ def get_expiration_on_or_after_date(self, dt: Union[date, datetime], chains: Uni
791791
self.strategy.log_message(f"Cannot validate data without underlying symbol, returning {exp_date}", color="yellow")
792792
return exp_date
793793

794-
# No future expirations with valid data; log and check last available
795-
if expiration_dates:
796-
# Check the last available expiry for data
797-
for exp_str, exp_date in reversed(expiration_dates):
798-
strikes = specific_chain.get(exp_str)
799-
if strikes and len(strikes) > 0:
800-
self.strategy.log_message(
801-
f"No valid expirations on or after {dt}; using latest available {exp_date} for {call_or_put_caps}.",
802-
color="yellow",
803-
)
804-
return exp_date
805-
794+
# No future expirations with tradeable data; let the caller skip entries gracefully.
795+
self.strategy.log_message(
796+
f"No valid expirations on or after {dt} with tradeable data for {call_or_put_caps}; skipping.",
797+
color="yellow",
798+
)
806799
return None
807800

808801
# ============================================================

lumibot/strategies/_strategy.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,12 @@ def _normalize_backtest_datetime(value):
133133
"""
134134
if value is None:
135135
return None
136-
if isinstance(value, datetime.datetime) and (
137-
value.tzinfo is None or value.tzinfo.utcoffset(value) is None
138-
):
139-
return to_datetime_aware(value)
136+
if isinstance(value, datetime.datetime):
137+
tzinfo = value.tzinfo
138+
if tzinfo is None or tzinfo.utcoffset(value) is None:
139+
return to_datetime_aware(value)
140+
if not hasattr(tzinfo, "zone"):
141+
return value.astimezone(LUMIBOT_DEFAULT_PYTZ)
140142
return value
141143

142144
@property

lumibot/tools/ccxt_data_store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ def _fill_missing_data(self, df:DataFrame, freq:str)->DataFrame:
445445
if freq == "1d":
446446
dt_range = pd.date_range(start=df.index.min(), end=df.index.max(), freq="D")
447447
else:
448-
dt_range = pd.date_range(start=df.index.min(), end=df.index.max(), freq="T")
448+
dt_range = pd.date_range(start=df.index.min(), end=df.index.max(), freq="min")
449449

450450
df_complete = df.reindex(dt_range).ffill()
451451
df_complete['missing'] = np.where(df_complete.index.isin(df.index), 0, 1)

lumibot/tools/databento_helper.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -947,6 +947,7 @@ def get_last_price_from_databento(
947947
api_key: str,
948948
asset: Asset,
949949
venue: Optional[str] = None,
950+
reference_date: Optional[datetime] = None,
950951
**kwargs
951952
) -> Optional[Union[float, Decimal]]:
952953
"""
@@ -978,12 +979,14 @@ def get_last_price_from_databento(
978979

979980
# For continuous futures, resolve to the current active contract
980981
if asset.asset_type == Asset.AssetType.CONT_FUTURE:
981-
# Use Asset class method to resolve continuous futures to actual contract (returns string)
982-
resolved_symbol = asset.resolve_continuous_futures_contract(year_digits=1)
982+
# Resolve based on reference date when backtesting so we match the contract in use
983+
resolved_symbol = _format_futures_symbol_for_databento(
984+
asset,
985+
reference_date=reference_date,
986+
)
983987
if resolved_symbol is None:
984988
logger.error(f"Could not resolve continuous futures contract for {asset.symbol}")
985989
return None
986-
# Generate the correct DataBento symbol format (should be single result)
987990
symbols_to_try = _generate_databento_symbol_alternatives(asset.symbol, resolved_symbol)
988991
logger.info(f"Resolved continuous future {asset.symbol} to specific contract: {resolved_symbol}")
989992
logger.info(f"DataBento symbol format for last price: {symbols_to_try[0]}")
@@ -1000,12 +1003,17 @@ def get_last_price_from_databento(
10001003
if hasattr(range_result, 'end') and range_result.end:
10011004
if hasattr(range_result.end, 'tz_localize'):
10021005
# Already a pandas Timestamp
1003-
available_end = range_result.end if range_result.end.tz else range_result.end.tz_localize('UTC')
1006+
if range_result.end.tz is not None:
1007+
available_end = range_result.end.tz_convert('UTC')
1008+
else:
1009+
available_end = range_result.end.tz_localize('UTC')
10041010
else:
10051011
# Convert to pandas Timestamp
1006-
available_end = pd.to_datetime(range_result.end).tz_localize('UTC')
1012+
ts = pd.to_datetime(range_result.end)
1013+
available_end = ts if ts.tz is not None else ts.tz_localize('UTC')
10071014
elif isinstance(range_result, dict) and 'end' in range_result:
1008-
available_end = pd.to_datetime(range_result['end']).tz_localize('UTC')
1015+
ts = pd.to_datetime(range_result['end'])
1016+
available_end = ts if ts.tz is not None else ts.tz_localize('UTC')
10091017
else:
10101018
logger.warning(f"Could not parse dataset range for {dataset}: {range_result}")
10111019
# Fallback: use a recent date that's likely to have data
@@ -1047,10 +1055,10 @@ def get_last_price_from_databento(
10471055
df = pd.DataFrame(data)
10481056

10491057
if not df.empty:
1050-
# Get the last available price (close price of most recent bar)
10511058
if 'close' in df.columns:
1052-
price = df['close'].iloc[-1]
1053-
if pd.notna(price):
1059+
closes = df['close'].dropna()
1060+
if not closes.empty:
1061+
price = closes.iloc[-1]
10541062
logger.info(f"✓ SUCCESS: Got last price for {symbol_to_use}: {price}")
10551063
return float(price)
10561064

0 commit comments

Comments
 (0)