# electricity_prices.py import os from datetime import date as Date from typing import Dict, Optional import pandas as pd from huggingface_hub import hf_hub_download # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- # Private dataset repo on Hugging Face containing the CSV files HF_DATASET_REPO = "sithuWiki/electricity" HF_DATASET_TOKEN_ENV = "HF_DATASET_TOKEN" # set this in your Space secrets # Fallback / base rates used when a date is outside the CSV range BASE_ELECTRICITY_RATES: Dict[str, float] = { "texas": 0.1549, "china": 0.08, "ethiopia": 0.01, } # Mapping from region name -> CSV filename in the private dataset REGION_FILES: Dict[str, str] = { "texas": "texas_residential_daily_df.csv", "china": "china_electricity_prices_daily.csv", "ethiopia": "ethiopia_electricity_prices_daily.csv", } # In-memory cache: region -> pandas.Series indexed by python date with float prices _ELECTRICITY_SERIES: Dict[str, Optional[pd.Series]] = {} def _get_token() -> str: token = os.environ.get(HF_DATASET_TOKEN_ENV) if not token: raise RuntimeError( f"Environment variable {HF_DATASET_TOKEN_ENV} is not set. " "Add a read token for the private dataset to your Space secrets." ) return token def _load_region_series(region: str, filename: str) -> Optional[pd.Series]: """ Load a single region's CSV from the private HF dataset as a Series. Expected columns in CSV: - 'date' (any format parsable by pandas.to_datetime, e.g. '10/1/15') - 'price' (electricity price per kWh) """ try: token = _get_token() file_path = hf_hub_download( repo_id=HF_DATASET_REPO, filename=filename, repo_type="dataset", token=token, ) df = pd.read_csv(file_path) if "date" not in df.columns or "price" not in df.columns: raise ValueError(f"{filename} must contain 'date' and 'price' columns.") # Normalize date to python date objects df["date"] = pd.to_datetime(df["date"]).dt.date df = df[["date", "price"]].copy() df = df.sort_values("date") series = df.set_index("date")["price"].astype(float) return series except Exception as e: print(f"⚠️ Could not load electricity data for {region} from {filename}: {e}") return None # Load all regions at import time (one-time cost) for _region, _fname in REGION_FILES.items(): _ELECTRICITY_SERIES[_region] = _load_region_series(_region, _fname) def get_electricity_rate(region: str, d) -> float: """ Return the electricity rate (USD/kWh) for a given region and date. - If d is inside the CSV range, we use that day's price (or last available before d, to handle gaps). - If d is outside the CSV range or data is missing, we fall back to BASE_ELECTRICITY_RATES[region]. """ if region not in BASE_ELECTRICITY_RATES: raise ValueError( f"Unknown region '{region}'. Expected one of {list(BASE_ELECTRICITY_RATES.keys())}" ) # Normalise input date if isinstance(d, pd.Timestamp): d = d.date() elif isinstance(d, str): d = pd.to_datetime(d).date() elif isinstance(d, Date): pass # already ok else: raise TypeError( f"Unsupported date type {type(d)}; expected datetime.date, pandas.Timestamp, or str" ) base_rate = BASE_ELECTRICITY_RATES[region] series = _ELECTRICITY_SERIES.get(region) if series is None or series.empty: return base_rate idx = series.index # Outside known range → use base constant rate if d < idx[0] or d > idx[-1]: return base_rate # Exact match if d in series.index: return float(series.loc[d]) # Otherwise, use the last available price before this date prev = series.loc[:d] if prev.empty: return base_rate return float(prev.iloc[-1])