Source code for antupy.tsg.weather

"""module for weather forecast. It should include functions for the following tasks:
- TMY loading and generation.
- MERRA2 wrapper and other open-source weather data.
- weather data generator
"""

from dataclasses import dataclass, field

import pandas as pd

from antupy.ddd_au import DIRECTORY
from antupy import Var
from antupy.tsg.settings import TimeParams
from antupy.utils.loc import Location

import os
import pandas as pd
import numpy as np
import xarray as xr

from typing import Optional, Literal, Protocol, runtime_checkable

from antupy.ddd_au import (
    DIRECTORY,
    DEFINITIONS,
    SIMULATIONS_IO
)
from antupy.utils.loc.loc_au import (
    LocationAU,
    _from_postcode
)
from antupy.utils.loc.loc_cl import LocationCL

DIR_DATA = DIRECTORY.DIR_DATA
DEFINITION_SEASON = DEFINITIONS.SEASON
LOCATIONS_METEONORM = DEFINITIONS.LOCATIONS_METEONORM
LOCATIONS_STATE = DEFINITIONS.LOCATIONS_STATE
LOCATIONS_COORDINATES = DEFINITIONS.LOCATIONS_COORDINATES
TS_WEATHER = SIMULATIONS_IO.TS_TYPES["weather"]

#--------------
DIR_METEONORM = os.path.join(DIR_DATA["weather"], "meteonorm_processed")
DIR_MERRA2 = os.path.join(DIR_DATA["weather"], "merra2_processed")
DIR_NCI = os.path.join(DIR_DATA["weather"], "nci_processed")

FILES_WEATHER = {
    "METEONORM_TEMPLATE" : os.path.join(DIR_METEONORM, "meteonorm_{:}.csv"),  #expected LOCATIONS_METEONORM
    "MERRA2" : os.path.join(DIR_MERRA2, "merra2_processed_all.nc"),
    "NCI": "",
}

_VARIABLE_RANGES = {
    "GHI" : (1000.,1000.),
    "temp_amb" : (10.0,40.0),
    "temp_mains" : (10.0,30.0),
}

# Type alias for simulation types
WeatherSimulationType = Literal["tmy", "mc", "historical", "constant_day"]
WeatherDatasetType = Literal["meteonorm", "merra2", "local", ""]
WeatherSubsetType = Literal["all", "annual", "season", "month", "date", None]



[docs] @runtime_checkable class Weather(Protocol): """ Weather generator protocol. Defines the interface for weather data generation for thermal and PV simulations. Required attributes: dataset: Source of weather data (e.g., "meteonorm", "merra2") location: Location where the simulation is performed (str or Location object) time_params: Time parameters defining the simulation period and timesteps """ dataset: str location: str | Location time_params: TimeParams def load_data(self) -> pd.DataFrame: """Load weather data based on the instance's time_params. Returns: A dataframe with the weather timeseries using time_params.idx_pd as index. """ ...
[docs] @dataclass class TMY(Weather): """ TMY (Typical Meteorological Year) weather generator. One year of data, usually with TMY files. Parameters: dataset: Source of weather data. Options: "meteonorm", "merra2". location: City where the simulation is performed. time_params: Time parameters defining the simulation period. """ dataset: str = "meteonorm" location: str | Location = field(default_factory=lambda: LocationAU("Sydney")) time_params: TimeParams = field(default_factory=TimeParams) def load_data(self) -> pd.DataFrame: """Load TMY data based on the instance's time_params.""" ts_index = self.time_params.idx_pd ts_df = pd.DataFrame(index=ts_index, columns=TS_WEATHER) return _load_tmy(ts_df, dataset=self.dataset, location=self.location, columns=TS_WEATHER)
[docs] @dataclass class WeatherMC(Weather): """ Monte Carlo weather generator. Random sample of temporal unit (e.g. days) from set (month, week, day). Parameters: dataset: Source of weather data. Options: "meteonorm", "merra2", "nci". location: City where the simulation is performed. time_params: Time parameters defining the simulation period. subset: The subset to generate data. Options: "annual", "season", "month", "date". random: Whether generates data randomly or periodically. value: The value used on subset (season name, month number, or date). """ dataset: str = "meteonorm" location: str | Location = field(default_factory=lambda: LocationAU("Sydney")) time_params: TimeParams = field(default_factory=TimeParams) subset: str | None = None random: bool = False value: str | int | None = None def load_data(self) -> pd.DataFrame: """Load Monte Carlo weather data based on the instance's time_params.""" ts_index = self.time_params.idx_pd ts_df = pd.DataFrame(index=ts_index, columns=TS_WEATHER) return _load_montecarlo(ts_df, dataset=self.dataset, location=self.location, subset=self.subset, value=self.value, columns=TS_WEATHER)
[docs] @dataclass class WeatherHist(Weather): """ Historical weather generator. Specific dates for a specific location from historical datasets. Parameters: dataset: Source of weather data. Options: "merra2", "nci", "local". location: City where the simulation is performed. time_params: Time parameters defining the simulation period. file_path: Path to the weather file location. list_dates: Set of dates to load. """ dataset: str = "merra2" location: str | Location = field(default_factory=lambda: LocationAU("Sydney")) time_params: TimeParams = field(default_factory=TimeParams) file_path: str | None = None list_dates: pd.DatetimeIndex | pd.Timestamp | None = None def load_data(self) -> pd.DataFrame: """Load historical weather data based on the instance's time_params.""" ts_index = self.time_params.idx_pd ts_df = pd.DataFrame(index=ts_index, columns=TS_WEATHER) return _load_historical(ts_df, file_path=self.file_path, columns=TS_WEATHER)
[docs] @dataclass class WeatherConstantDay(Weather): """ Constant day weather generator. Environmental variables kept constant throughout the simulation. Parameters: dataset: Source of weather data (usually empty for constant values). location: City where the simulation is performed. time_params: Time parameters defining the simulation period. random: Whether to generate random values within ranges. value: Specific constant values to use. subset: Additional subset parameter. """ dataset: str = "" location: str | Location = field(default_factory=lambda: LocationAU("Sydney")) time_params: TimeParams = field(default_factory=TimeParams) random: bool = False value: str | int | None = None subset: str | None = None def load_data(self) -> pd.DataFrame: """Load constant day weather data based on the instance's time_params.""" ts_index = self.time_params.idx_pd ts_df = pd.DataFrame(index=ts_index, columns=TS_WEATHER) return _load_day_constant_random(ts_df)
#---------- def _load_day_constant_random( timeseries: pd.DataFrame, ranges: dict[str,tuple] = _VARIABLE_RANGES, seed_id: Optional[int] = None, columns: list[str] = TS_WEATHER, ) -> pd.DataFrame: if seed_id is None: seed = np.random.SeedSequence().entropy else: seed = seed_id rng = np.random.default_rng(seed) idx = pd.to_datetime(timeseries.index) dates = np.unique(idx.date) DAYS = len(dates) df_weather_days = pd.DataFrame( index=dates, columns=columns) df_weather_days.index = pd.to_datetime(df_weather_days.index) for lbl in ranges.keys(): df_weather_days[lbl] = rng.uniform( ranges[lbl][0], ranges[lbl][1], size=DAYS, ) df_weather = df_weather_days.loc[idx.date] df_weather.index = idx timeseries[columns] = df_weather[columns] return timeseries #--------------------------------- def _random_days_from_dataframe( timeseries: pd.DataFrame, df_sample: pd.DataFrame, seed_id: Optional[int] = None, columns: Optional[list[str]] = TS_WEATHER, ) -> pd.DataFrame : if seed_id is None: seed = np.random.SeedSequence().entropy else: seed = seed_id rng = np.random.default_rng(seed) df_sample_new = df_sample.copy() df_sample_idx = pd.to_datetime(df_sample_new.index) ts_index = pd.to_datetime(timeseries.index) list_dates = np.unique(df_sample_idx.date) DAYS = len(np.unique(ts_index.date)) list_picked_dates = rng.choice( list_dates, size=DAYS ) df_sample_new["date"] = df_sample_idx.date set_picked_days = [ df_sample_new[df_sample_new["date"]==date] for date in list_picked_dates ] df_final = pd.concat(set_picked_days) df_final.index = ts_index timeseries[columns] = df_final[columns] return timeseries #--------------------------------- def from_tmy( timeseries: pd.DataFrame, TMY: pd.DataFrame, columns: Optional[list[str]] = TS_WEATHER, ) -> pd.DataFrame : rows_timeseries = len(timeseries) rows_tmy = len(TMY) if rows_tmy <= rows_timeseries: N = int( np.ceil( rows_timeseries/rows_tmy ) ) TMY_extended = pd.concat([TMY]*N, ignore_index=True) TMY_final = TMY_extended.iloc[:rows_timeseries] else: TMY_final = TMY.iloc[:rows_timeseries] TMY_final.index = timeseries.index timeseries[columns] = TMY_final[columns] return timeseries # ------------- def _load_tmy( ts: pd.DataFrame, params: dict | None = None, *, dataset: str | None = None, location: str | Location | None = None, columns: list[str] | None = TS_WEATHER, ) -> pd.DataFrame: # Handle both dict params and keyword arguments if params is not None: # Legacy dict-based interface dataset = params["dataset"] location = params["location"] elif dataset is None or location is None: raise ValueError("Either params dict or dataset+location keywords must be provided") YEAR = pd.to_datetime(ts.index).year[0] # At this point, dataset and location are guaranteed to be not None assert dataset is not None and location is not None # Convert Location objects to string for processing location_str = str(location) if not isinstance(location, str) else location if dataset == "meteonorm": df_dataset = _load_dataset_meteonorm(location_str, YEAR) elif dataset == "merra2": # For MERRA2, convert LocationCL to LocationAU if needed, or pass as is if compatible if isinstance(location, LocationCL): # Convert to string representation for MERRA2 location_for_merra2 = str(location) else: location_for_merra2 = location df_dataset = _load_dataset_merra2(ts, location_for_merra2, YEAR) # type: ignore else: raise ValueError(f"dataset: {dataset} is not available.") return from_tmy( ts, df_dataset, columns=columns ) def _load_dataset_meteonorm( location: str, YEAR: int = 2022, START: int = 0, STEP: int = 3, ) -> pd.DataFrame: if location not in DEFINITIONS.LOCATIONS_METEONORM: raise ValueError(f"location {location} not in available METEONORM files") df_dataset = pd.read_csv( os.path.join( DIR_METEONORM, FILES_WEATHER["METEONORM_TEMPLATE"].format(location), ), index_col=0 ) PERIODS = len(df_dataset) start_time = pd.to_datetime(f"{YEAR}-01-01 00:00:00") + pd.DateOffset(hours=START) df_dataset.index = pd.date_range( start=start_time, periods=PERIODS, freq=f"{STEP}min") df_dataset["date"] = df_dataset.index df_dataset["date"] = df_dataset["date"].apply(lambda x: x.replace(year=YEAR)) df_dataset = df_dataset.set_index(pd.to_datetime(df_dataset["date"])) return df_dataset def _load_dataset_merra2( ts: pd.DataFrame, location: LocationAU | str | tuple | int, YEAR: int, STEP:int = 5, file_dataset:str = FILES_WEATHER["MERRA2"], ) -> pd.DataFrame: if isinstance(location, int): #postcode (lon,lat) = _from_postcode(location, get="coords") elif isinstance(location,str): #city loc = LocationAU(location) (lon,lat) = (loc.lon, loc.lat) elif isinstance(location, tuple): #(longitude, latitude) tuple (lon,lat) = (location) elif isinstance(location, LocationAU): (lon,lat) = (location.lon, location.lat) else: raise ValueError(f"location {location} not in available format.") data_weather = xr.open_dataset(file_dataset) lons = np.array(data_weather.lon) lats = np.array(data_weather.lat) lon_a = lons[(abs(lons-lon)).argmin()] lat_a = lats[(abs(lats-lat)).argmin()] df_w = data_weather.sel(lon=lon_a,lat=lat_a).to_dataframe() df_w.index = pd.to_datetime(df_w.index).tz_localize('UTC') tz = 'Australia/Brisbane' df_w.index = df_w.index.tz_convert(tz) df_w.index = df_w.index.tz_localize(None) df_w.rename(columns={'SWGDN':'GHI','T2M':'Temp_Amb'},inplace=True) df_w = df_w[['GHI','Temp_Amb']].copy() df_w = df_w.resample(f"{STEP}T").interpolate() #Getting the data in half hours ts["GHI"] = df_w["GHI"] ts["Temp_Amb"] = df_w["Temp_Amb"] - 273.15 ######################################### #Replace later for the closest city df_aux = _load_dataset_meteonorm("Sydney", YEAR) df_aux = df_aux.resample(f"{STEP}T").interpolate() #Getting the data in half hours ts["Temp_Mains"] = df_aux["Temp_Mains"] ######################################### return ts #---------- def _load_montecarlo( ts: pd.DataFrame, params: dict | None = None, *, dataset: str | None = None, location: str | Location | None = None, subset: str | None = None, value: str | int | None = None, columns: Optional[list[str]] = TS_WEATHER, ) -> pd.DataFrame: # Handle both dict params and keyword arguments if params is not None: # Legacy dict-based interface dataset = params["dataset"] location = params["location"] subset = params["subset"] value = params["value"] elif any(x is None for x in [dataset, location, subset]): raise ValueError("Either params dict or dataset+location+subset keywords must be provided") # Convert Location objects to string for processing location_str = str(location) if not isinstance(location, str) else location ts_index = pd.to_datetime(ts.index) # At this point, required parameters are guaranteed to be not None assert dataset is not None and location is not None and subset is not None if dataset == "meteonorm": df_dataset = _load_dataset_meteonorm(location_str) elif dataset == "merra2": # For MERRA2, ensure location is in correct format if isinstance(location, LocationAU): location_for_merra2 = location elif isinstance(location, LocationCL) or hasattr(location, 'value'): # Convert non-AU Location objects to string representation for MERRA2 location_for_merra2 = str(location) else: location_for_merra2 = location # Type assertion to help type checker since we've converted to compatible types df_dataset = _load_dataset_merra2(ts, location_for_merra2, ts_index.year[0]) # type: ignore else: raise ValueError(f"dataset: {dataset} is not available.") df_dataset.index = pd.to_datetime(df_dataset.index) if subset == 'annual': df_sample = df_dataset[ df_dataset.index.year==value ] elif subset == 'season': # value should be a string for season if not isinstance(value, str): raise ValueError(f"For season subset, value must be a string, got {type(value)}") df_sample = df_dataset[ df_dataset.index.isin(DEFINITION_SEASON[value]) ] elif subset == 'month': # value should be an int for month if not isinstance(value, int): raise ValueError(f"For month subset, value must be an int, got {type(value)}") df_sample = df_dataset[ df_dataset.index.month==value ] elif subset == 'date': # value should have a date() method (datetime/Timestamp) if not hasattr(value, 'date'): raise ValueError(f"For date subset, value must be a datetime object, got {type(value)}") df_sample = df_dataset[ df_dataset.index.date==value.date() # type: ignore ] else: raise ValueError(f"subset: {subset} not in available options.") df_weather = _random_days_from_dataframe( ts, df_sample, columns=columns ) return df_weather #---------------- def _load_historical( ts: pd.DataFrame, params: dict | None = None, *, file_path: str | None = None, columns: Optional[list[str]] = TS_WEATHER, ) -> pd.DataFrame: # Handle both dict params and keyword arguments if params is not None: # Legacy dict-based interface file_path = params["file_path"] elif file_path is None: raise ValueError("Either params dict or file_path keyword must be provided") # At this point, file_path is guaranteed to be not None assert file_path is not None ts_ = pd.read_csv(file_path, index_col=0) ts_.index = pd.to_datetime(ts.index) return ts_ def main(): from antupy.tsg.settings import TimeParams tp = TimeParams(YEAR=Var(2020,"-"), STEP=Var(30,"min")) #---------------- # TMY with Meteonorm tmy_weather = TMY(dataset="meteonorm", location="Sydney", time_params=tp) ts_tmy = tmy_weather.load_data() print("TMY Meteonorm:", ts_tmy[TS_WEATHER]) #---------------- # TMY with MERRA2 location = LocationAU(2035) tp2 = TimeParams(YEAR=Var(2020,"-"), STEP=Var(30,"min")) tmy_weather_merra = TMY(dataset="merra2", location=str(location), time_params=tp2) ts_tmy_merra = tmy_weather_merra.load_data() print("TMY MERRA2:", ts_tmy_merra[TS_WEATHER]) #---------------- # Monte Carlo mc_weather = WeatherMC( dataset="meteonorm", location=str(LocationAU(2035)), time_params=tp, subset="month", value=5 ) ts_mc = mc_weather.load_data() print("Monte Carlo:", ts_mc[TS_WEATHER]) #---------------- # Constant day constant_weather = WeatherConstantDay(time_params=tp) ts_constant = constant_weather.load_data() print("Constant Day:", ts_constant[TS_WEATHER]) return if __name__ == "__main__": main() pass