Historical Data Export for Backtesting

Export historical data with local caching for backtesting trading strategies

Export historical data with local caching for backtesting trading strategies.

import pandas as pd
from pathlib import Path
from datetime import datetime, timedelta

def fetch_with_cache(
    dataset: str,
    start: str,
    end: str,
    cache_dir: str = ".backtest_cache",
    **kwargs
) -> pd.DataFrame:
    """Fetch data with local CSV caching to avoid repeated API calls."""
    cache_path = Path(cache_dir)
    cache_path.mkdir(exist_ok=True)

    # Create unique cache key
    params_str = "_".join(f"{k}={v}" for k, v in sorted(kwargs.items()))
    cache_file = cache_path / f"{dataset}_{start}_{end}_{hash(params_str)}.csv"

    if cache_file.exists():
        print(f"Loading from cache: {cache_file.name}")
        return pd.read_csv(cache_file)

    print(f"Fetching {dataset} from {start} to {end}...")
    df = client.get_dataset(dataset, start=start, end=end, **kwargs)

    if len(df) > 0:
        df.to_csv(cache_file, index=False)
        print(f"Cached {len(df)} rows to {cache_file.name}")

    return df

# Example: Export one week of hourly hub prices for backtesting
df = fetch_with_cache(
    "ercot_lmp_by_settlement_point",
    start="2026-01-01",
    end="2026-01-08",
    filter_column="location",
    filter_value="HB_HOUSTON",
    resample="1 hour",
    resample_function="mean",
    timezone="America/Chicago"
)

print(f"\nExported {len(df)} hourly price observations")
print(f"Date range: {df['interval_start_local'].min()} to {df['interval_start_local'].max()}")
print(f"Price range: ${df['lmp'].min():.2f} to ${df['lmp'].max():.2f}")

Last updated

Was this helpful?