Quota-Aware Batch Processing

Process large date ranges while respecting API quotas

Process large date ranges while respecting API quotas.

import pandas as pd
from datetime import datetime, timedelta

def safe_batch_query(
    dataset: str,
    start: datetime,
    end: datetime,
    batch_days: int = 7,
    min_quota_buffer: int = 100000,
    **kwargs
) -> pd.DataFrame:
    """Fetch data in batches, checking quota before each batch."""
    all_data = []
    current = start

    while current < end:
        # Check remaining quota
        usage = client.get_api_usage()
        rows_remaining = (
            usage['limits']['api_rows_returned_limit'] -
            usage['current_period_usage']['total_api_rows_returned']
        )

        if rows_remaining < min_quota_buffer:
            print(f"Stopping: Only {rows_remaining:,} rows remaining in quota")
            break

        batch_end = min(current + timedelta(days=batch_days), end)
        print(f"Fetching {current.date()} to {batch_end.date()} (quota: {rows_remaining:,} remaining)...")

        df = client.get_dataset(
            dataset,
            start=current.isoformat(),
            end=batch_end.isoformat(),
            **kwargs
        )

        if len(df) > 0:
            all_data.append(df)
            print(f"  Retrieved {len(df):,} rows")

        current = batch_end

    if all_data:
        return pd.concat(all_data, ignore_index=True)
    return pd.DataFrame()

# Example: Fetch two weeks of data in weekly batches
df = safe_batch_query(
    "ercot_fuel_mix",
    start=datetime(2026, 1, 1),
    end=datetime(2026, 1, 15),
    batch_days=7
)

print(f"\nTotal rows retrieved: {len(df):,}")

Last updated

Was this helpful?