Source code for antupy.analyser.par

# -*- coding: utf-8 -*-
"""
Enhanced Parametric analysis module with ap.Frame integration.

This module provides the Parametric class for setting up and running
parametric studies with full unit tracking using ap.Frame instead of pd.DataFrame.
"""

import os
import copy
import itertools
import pickle
from collections.abc import Iterable, Mapping
from pathlib import Path
from typing import Any, overload

import numpy as np

from antupy import Simulation, Plant, Var, Array, Frame

ParameterValue = Array | Iterable[str | int | float]
SimulationType = Simulation | Plant


[docs] class Parametric: """ Enhanced parametric analysis manager with full unit tracking. This class handles the setup, execution, and management of parametric studies where multiple input parameters are varied systematically to explore their effects on simulation outputs. Results are stored in a Frame. Parameters ---------- base_case : Simulation or Plant Base simulation or plant object to use as template for all parametric runs. This object will be deep-copied for each simulation case. params_in : dict[str, Array or Iterable] Dictionary mapping parameter names to their value ranges. Keys are parameter names (support dot notation for nested attributes). Values can be Array objects with units or iterables of values. params_out : list[str], optional List of output parameter names to extract from simulation results. If empty list (default), all available outputs will be extracted. save_results_detailed : bool, optional Whether to save detailed simulation objects as pickle files. Default is False to save disk space. dir_output : Path, str, or None, optional Directory path to save detailed simulation results. Created automatically if it doesn't exist. Default is None. path_results : Path, str, or None, optional File path for saving summary results CSV after each simulation. Enables incremental saving for long-running studies. Default is None. include_gitignore : bool, optional Whether to create a .gitignore file in dir_output to ignore .plk files. Default is False. Only applies when dir_output is specified. isolation_mode : str, optional Simulation isolation mode. Options: - 'reuse' (default): Reuses the same base_case instance across simulations. - 'deepcopy': Each simulation uses a deep copy of base_case. Guarantees complete isolation but slower performance. Use if run_simulation() has side effects or modifies internal state. verbose : bool, optional Whether to print progress information during analysis. Default is True. Attributes ---------- base_case : Simulation or Plant The base simulation template object. params_in : dict[str, Array or Iterable] Input parameters dictionary as provided during initialization. params_out : list[str] Output parameters to extract from simulation results. save_results_detailed : bool Flag indicating whether to save detailed simulation objects. dir_output : Path or None Directory for saving detailed simulation results. path_results : Path or None File path for saving incremental CSV results. verbose : bool Flag for progress information printing. cases : Frame or None Frame containing all input parameter combinations to analyze. Created by setup_cases() method with units preserved. results : Frame or None Complete results Frame with input parameters and output metrics. Available after successful run_analysis() execution with units. Examples -------- Basic parametric study with units preserved: >>> from antupy.analyser.par import Parametric >>> from antupy.array import Array >>> >>> # Define parameter ranges with units >>> params_in = { ... 'temperature': Array([20, 25, 30], '°C'), ... 'flow_rate': Array([0.1, 0.2, 0.3], 'm3/s') ... } >>> >>> # Create and run analysis >>> study = Parametric( ... base_case=my_simulation, ... params_in=params_in, ... params_out=['efficiency', 'cost'] ... ) >>> results = study.run_analysis() # Returns ap.Frame with units >>> >>> # Access results with units >>> efficiency_array = results.get_values('efficiency') # Array with units >>> all_outputs = results.get_values(['efficiency', 'cost']) # Dict of Arrays """
[docs] def __init__( self, base_case: SimulationType, params_in: Mapping[str, ParameterValue], params_out: list[str] = [], save_results_detailed: bool = False, dir_output: Path | str | None = None, path_results: Path | str | None = None, include_gitignore: bool = False, isolation_mode: str = 'reuse', verbose: bool = True ): self.base_case = base_case self.params_in = params_in self.params_out = params_out self.save_results_detailed = save_results_detailed self.include_gitignore = include_gitignore self.isolation_mode = isolation_mode self.verbose = verbose # Validate isolation_mode if isolation_mode not in ['deepcopy', 'reuse']: raise ValueError(f"isolation_mode must be 'deepcopy' or 'reuse', got '{isolation_mode}'") # Convert paths to Path objects self.dir_output = Path(dir_output) if dir_output is not None else None self.path_results = Path(path_results) if path_results is not None else None # Internal state - using Frame instead of DataFrame self.cases: Frame | None = None self.results: Frame | None = None
def setup_cases( self, params_in: Mapping[str, ParameterValue] ) -> Frame: """ Create parametric run matrix from input parameters as ap.Frame. Generates all combinations of input parameters using Cartesian product. Order of parameters follows "first=outer" convention where the first parameter varies slowest and the last parameter varies fastest. Units are preserved in the returned Frame. Parameters ---------- params_in : dict[str, Array or Iterable] Dictionary mapping parameter names to their value ranges. Keys are parameter names (support dot notation for nested attributes). Values can be Array objects with units or iterables of strings/numbers. Returns ------- cases : Frame Frame with all parameter combinations as rows and units preserved. Columns correspond to parameter names from params_in keys. Number of rows equals the product of all parameter range lengths. Examples -------- Basic parameter setup with units: >>> params = { ... 'temp': Array([20, 30], 'K'), ... 'size': ['small', 'large'] ... } >>> cases = study.setup_cases(params) >>> print(cases.units) {'temp': 'K', 'size': ''} """ cols_in = list(params_in.keys()) # Handle empty parameters case if not cols_in: self.cases = Frame() return self.cases params_values = [] params_units = [] for lbl, values in params_in.items(): if isinstance(values, Array): # Extract values and unit from Array params_values.append(values.value.tolist()) params_units.append(values.u) else: # Handle iterable (list, tuple, etc.) params_values.append(list(values)) params_units.append("") # No unit for plain iterables # Create Frame with units self.cases = Frame( list(itertools.product(*params_values)), columns=cols_in, units=params_units ) return self.cases def _extract_outputs( self, sim: SimulationType, params_out: list[str] ) -> tuple[list[float], list[str]]: """ Extract output values and units from simulation object. Supports extraction from Var, Array, and plain numeric types. For Array objects with multiple values, returns the mean. Parameters ---------- sim : Simulation or Plant Simulation object with .out dictionary containing results. params_out : list[str] List of parameter names to extract from sim.out. Returns ------- tuple[list[float], list[str]] Values as list of floats and corresponding units as list of strings. Examples -------- Extract from simulation with mixed output types: >>> sim.out = { ... 'efficiency': Var(0.85, '-'), ... 'power': Array([1000, 1100], 'W'), ... 'count': 42 ... } >>> values, units = self._extract_outputs(sim, ['efficiency', 'power', 'count']) >>> # Returns: ([0.85, 1050.0, 42.0], ['-', 'W', '']) """ values = [] units = [] for param in params_out: if param not in sim.out: raise KeyError(f"Parameter '{param}' not found in sim.out") output = sim.out[param] if isinstance(output, Var): # Extract value and unit from Var values.append(float(output.v)) units.append(output.u) elif isinstance(output, Array): # Extract value and unit from Array (use mean for multiple values) if len(output.value) == 1: values.append(float(output.value[0])) else: values.append(float(np.mean(output.value))) units.append(output.u) elif isinstance(output, str): # Handle string values (like status) values.append(output) units.append("") # No unit for strings else: # Handle plain numbers (int, float) try: values.append(float(output)) except (ValueError, TypeError): # If can't convert to float, store as string values.append(str(output)) units.append("") # No unit for plain numbers return values, units def run_analysis(self) -> Frame: """ Execute parametric analysis with configured input parameters. Creates all parameter combinations using setup_cases() and runs simulations for each, collecting specified output metrics. Results are returned as ap.Frame with full unit tracking. Returns ------- Frame Complete results Frame with input parameters and output metrics. Input columns contain parameter values with units from params_in. Output columns contain extracted metrics with units from sim.out. Each row represents one completed simulation case. Raises ------ KeyError If any parameter in params_out is not found in simulation outputs. Exception If simulation execution fails (errors are printed but not re-raised). Examples -------- Run analysis and access results with units: >>> study = Parametric( ... base_case=simulation, ... params_in={'temp': Array([20, 30], 'K')}, ... params_out=['efficiency', 'power_output'] ... ) >>> results = study.run_analysis() >>> >>> # Get values with units preserved >>> efficiency_array = results.get_values('efficiency') >>> all_results = results.get_values() >>> >>> # Access units >>> print(results.units) >>> print(results.unit('efficiency')) """ # Setup cases from input parameters cases_in = self.setup_cases(self.params_in) # Initialize results with input cases and their units results_data = cases_in.values.copy() # Copy input data results_columns = list(cases_in.columns) input_units = list(cases_in.units.values()) # Will track output units as we go output_units = {} # For auto-detection case, we'll determine the structure dynamically results = results_data detected_params_out = None # If we have explicit params_out, initialize results Frame now if self.params_out: num_output_cols = len(self.params_out) if num_output_cols > 0: output_data = np.full((len(cases_in), num_output_cols), np.nan) results_data = np.column_stack([results_data, output_data]) results_columns.extend(self.params_out) all_units_list = input_units + [""] * num_output_cols else: all_units_list = input_units results = Frame( results_data, columns=results_columns, units=all_units_list ) # Create output directory if needed if self.dir_output is not None: self.dir_output.mkdir(parents=True, exist_ok=True) # Create .gitignore file if requested if self.include_gitignore: gitignore_path = self.dir_output / '.gitignore' if not gitignore_path.exists(): gitignore_content = ( "# Ignore pickle files (simulation results)\n" "*.plk\n" "*.pkl\n" "*.pickle\n" ) with open(gitignore_path, 'w', encoding='utf-8') as f: f.write(gitignore_content) # Run simulations for index in range(len(cases_in)): if self.verbose: print(f'RUNNING SIMULATION {index + 1}/{len(cases_in)}') # Get input parameters for this case case_params = {} for col in cases_in.columns: case_params[col] = cases_in.iloc[index][col] # Create simulation copy and update parameters if self.isolation_mode == 'deepcopy': sim = copy.deepcopy(self.base_case) else: # 'reuse' mode sim = self.base_case self._update_parameters(sim, case_params, cases_in.units) # Run simulation sim.run_simulation(verbose=self.verbose) # Determine output parameters (for auto-detection on first run) if not self.params_out: params_out = list(sim.out.keys()) if detected_params_out is None: detected_params_out = params_out # Now we know the structure, create the results Frame num_output_cols = len(params_out) if num_output_cols > 0: output_data = np.full((len(cases_in), num_output_cols), np.nan) results_data = np.column_stack([results_data, output_data]) results_columns.extend(params_out) all_units_list = input_units + [""] * num_output_cols else: all_units_list = input_units results = Frame( results_data, columns=results_columns, units=all_units_list ) else: params_out = self.params_out # results Frame already initialized for explicit params_out case missing = [k for k in params_out if k not in sim.out] if missing: raise KeyError(f"The following params_out are not in sim.out: {missing}") # Extract outputs with units values_out, units_out = self._extract_outputs(sim, params_out) # Store values in results for param, value in zip(params_out, values_out): results.loc[index, param] = value # Update output units tracking (first time we see each output) for param, unit in zip(params_out, units_out): if param not in output_units: output_units[param] = unit # Save detailed results if requested if self.save_results_detailed and self.dir_output: pickle_path = self.dir_output / f'sim_{index}.plk' with open(pickle_path, "wb") as file: pickle.dump(sim, file, protocol=pickle.HIGHEST_PROTOCOL) # Save intermediate results to CSV if requested if self.path_results is not None: results.to_csv(self.path_results) if self.verbose: print(f"Case {index + 1} completed: {dict(zip(params_out, values_out))}") # Handle case where no results were created (empty input) if results is None: results = Frame( results_data, columns=results_columns, units=input_units ) # Update Frame with complete unit information all_units = {} for col in results.columns: if col in cases_in.columns: all_units[col] = cases_in.unit(col)[col] elif col in output_units: all_units[col] = output_units[col] else: all_units[col] = "" # Create final Frame with correct units final_results = Frame( results.values, columns=results.columns, units=[all_units[col] for col in results.columns] ) self.results = final_results return final_results def _update_parameters( self, simulation: SimulationType, case_params: dict[str, Any], input_units: dict[str, str] ) -> None: """ Update simulation object parameters with values from a case. Supports both direct attributes and nested attributes using dot notation. Creates Var objects for parameters with units. For SmartPlant instances, uses intelligent component invalidation instead of full recreation. Parameters ---------- simulation : Simulation or Plant Simulation object to update. case_params : dict[str, Any] Dictionary mapping parameter names to values for this case. input_units : dict[str, str] Dictionary mapping parameter names to their units. Examples -------- Update simulation parameters: >>> case_params = {'temperature': 25.0, 'subsystem.pressure': 2.0} >>> input_units = {'temperature': '°C', 'subsystem.pressure': 'bar'} >>> self._update_parameters(sim, case_params, input_units) >>> # sim.temperature becomes Var(25.0, '°C') >>> # sim.subsystem.pressure becomes Var(2.0, 'bar') """ changed_params = set() # Track which parameters actually changed for smart component invalidation for param_name, value in case_params.items(): unit = input_units.get(param_name, "") # Get old value for change detection if '.' in param_name: # Handle nested attribute parts = param_name.split('.') obj = simulation # Navigate to the parent object for part in parts[:-1]: if not hasattr(obj, part): obj = None break obj = getattr(obj, part) old_value = getattr(obj, parts[-1], None) if obj else None else: # Handle direct attribute old_value = getattr(simulation, param_name, None) # Set the new value if '.' in param_name: # Handle nested attribute with dot notation parts = param_name.split('.') obj = simulation # Navigate to the parent object for part in parts[:-1]: if not hasattr(obj, part): # Create intermediate object if it doesn't exist setattr(obj, part, type('', (), {})()) obj = getattr(obj, part) # Set the final attribute final_attr = parts[-1] if unit: # Create Var object if unit is specified new_value = Var(value, unit) setattr(obj, final_attr, new_value) else: # Set plain value if no unit setattr(obj, final_attr, value) new_value = value else: # Handle direct attribute if unit: # Create Var object if unit is specified new_value = Var(value, unit) setattr(simulation, param_name, new_value) else: # Set plain value if no unit setattr(simulation, param_name, value) new_value = value # Check if value actually changed for smart component tracking if self._param_values_different(old_value, new_value): changed_params.add(param_name) # Use smart component invalidation if available if hasattr(simulation, '_component_cache') and hasattr(simulation, '_param_hash_cache'): if changed_params: # For Plant, clear component cache to force recreation if isinstance(simulation, Plant): simulation._component_cache.clear() if isinstance(simulation, Plant): simulation._param_hash_cache.clear() # Still call __post_init__ for any plant-level derived parameter calculations if hasattr(simulation, '__post_init__'): simulation.__post_init__() # If no parameters changed, skip both invalidation and __post_init__ elif hasattr(simulation, '__post_init__'): # Fallback for non-smart plants simulation.__post_init__() def _param_values_different(self, old_value, new_value) -> bool: """Check if parameter values are actually different.""" # Handle Var objects if hasattr(old_value, 'gv') and hasattr(new_value, 'gv'): return (old_value.gv() != new_value.gv() or old_value.unit != new_value.unit) # Handle case where one is Var and other is not if hasattr(old_value, 'gv') and not hasattr(new_value, 'gv'): return True if not hasattr(old_value, 'gv') and hasattr(new_value, 'gv'): return True # Handle regular values try: return old_value != new_value except: # Fallback for complex objects return str(old_value) != str(new_value) @overload def get_output_arrays(self, cols: str) -> Array: ... @overload def get_output_arrays(self, cols: list[str]) -> dict[str, Array]: ... @overload def get_output_arrays(self, cols: None = None) -> dict[str, Array]: ... def get_output_arrays(self, cols: str | list[str] | None = None) -> dict[str, Array] | Array: """ Get analysis results as Array objects with units preserved. Parameters ---------- cols : str, list[str], or None Column name(s) to return as Arrays. If None, returns all columns. Returns ------- dict[str, Array] or Array If cols is str: single Array object. If cols is list or None: dict mapping column names to Array objects. Raises ------ ValueError If no results are available (run_analysis() not called yet). """ if self.results is None: raise ValueError("No results available. Run run_analysis() first.") return self.results.get_values(cols) def get_summary(self) -> dict[str, Any]: """ Generate summary of parametric analysis results. Returns ------- dict[str, Any] Summary statistics including case counts, parameter lists, completion status, units information, and statistical summaries for each output parameter. Examples -------- Get analysis summary: >>> summary = study.get_summary() >>> print(f"Completed {summary['total_cases']} cases") >>> print(f"Input units: {summary['input_units']}") >>> print(f"Efficiency stats: {summary['efficiency_stats']}") """ if self.results is None: return {"status": "No analysis completed"} if self.cases is None or len(self.cases) == 0: return {"status": "No cases defined"} # Basic information summary = { "total_cases": len(self.results), "input_parameters": [col for col in self.results.columns if col in self.cases.columns], "output_parameters": [col for col in self.results.columns if col not in self.cases.columns], "completed": True } # Units information summary["input_units"] = {} summary["output_units"] = {} for col in summary["input_parameters"]: summary["input_units"][col] = self.results.unit(col)[col] for col in summary["output_parameters"]: summary["output_units"][col] = self.results.unit(col)[col] # Statistical summaries for output parameters for col in summary["output_parameters"]: data = self.results[col].dropna() # Remove NaN values if len(data) > 0: unit = self.results.unit(col)[col] summary[f"{col}_stats"] = { "mean": float(data.mean()), "std": float(data.std()), "min": float(data.min()), "max": float(data.max()), "unit": unit } return summary
def main(): pass if __name__ == "__main__": main()