Historical Data Export for Backtesting
Export historical data with local caching for backtesting trading strategies
import pandas as pd
from pathlib import Path
from datetime import datetime, timedelta
def fetch_with_cache(
dataset: str,
start: str,
end: str,
cache_dir: str = ".backtest_cache",
**kwargs
) -> pd.DataFrame:
"""Fetch data with local CSV caching to avoid repeated API calls."""
cache_path = Path(cache_dir)
cache_path.mkdir(exist_ok=True)
# Create unique cache key
params_str = "_".join(f"{k}={v}" for k, v in sorted(kwargs.items()))
cache_file = cache_path / f"{dataset}_{start}_{end}_{hash(params_str)}.csv"
if cache_file.exists():
print(f"Loading from cache: {cache_file.name}")
return pd.read_csv(cache_file)
print(f"Fetching {dataset} from {start} to {end}...")
df = client.get_dataset(dataset, start=start, end=end, **kwargs)
if len(df) > 0:
df.to_csv(cache_file, index=False)
print(f"Cached {len(df)} rows to {cache_file.name}")
return df
# Example: Export one week of hourly hub prices for backtesting
df = fetch_with_cache(
"ercot_lmp_by_settlement_point",
start="2026-01-01",
end="2026-01-08",
filter_column="location",
filter_value="HB_HOUSTON",
resample="1 hour",
resample_function="mean",
timezone="America/Chicago"
)
print(f"\nExported {len(df)} hourly price observations")
print(f"Date range: {df['interval_start_local'].min()} to {df['interval_start_local'].max()}")
print(f"Price range: ${df['lmp'].min():.2f} to ${df['lmp'].max():.2f}")Last updated
Was this helpful?

