Source code for antupy.analyser.par

# -*- coding: utf-8 -*-
"""
Enhanced Parametric analysis module with ap.Frame integration.

This module provides the Parametric class for setting up and running
parametric studies with full unit tracking using ap.Frame instead of pd.DataFrame.
"""

import os
import copy
import itertools
import pickle
from collections.abc import Iterable, Mapping
from pathlib import Path
from typing import Any, overload

import numpy as np

from antupy import Simulation, Plant, Var, Array, Frame

ParameterValue = Array | Iterable[str | int | float]
SimulationType = Simulation | Plant



[docs]
class Parametric:
    """
    Enhanced parametric analysis manager with full unit tracking. This class handles the setup, execution, and management of parametric studies where multiple input parameters are varied systematically to explore their effects on simulation outputs. Results are stored in a Frame.
    
    Parameters
    ----------
    base_case : Simulation or Plant
        Base simulation or plant object to use as template for all parametric runs.
        This object will be deep-copied for each simulation case.
    params_in : dict[str, Array or Iterable]
        Dictionary mapping parameter names to their value ranges.
        Keys are parameter names (support dot notation for nested attributes).
        Values can be Array objects with units or iterables of values.
    params_out : list[str], optional
        List of output parameter names to extract from simulation results.
        If empty list (default), all available outputs will be extracted.
    save_results_detailed : bool, optional
        Whether to save detailed simulation objects as pickle files.
        Default is False to save disk space.
    dir_output : Path, str, or None, optional
        Directory path to save detailed simulation results.
        Created automatically if it doesn't exist. Default is None.
    path_results : Path, str, or None, optional
        File path for saving summary results CSV after each simulation.
        Enables incremental saving for long-running studies. Default is None.
    include_gitignore : bool, optional
        Whether to create a .gitignore file in dir_output to ignore .plk files.
        Default is False. Only applies when dir_output is specified.
    isolation_mode : str, optional
        Simulation isolation mode. Options:
        - 'reuse' (default): Reuses the same base_case instance across simulations.
        - 'deepcopy': Each simulation uses a deep copy of base_case. Guarantees complete isolation but slower performance. Use if run_simulation() has side effects or modifies internal state.
    verbose : bool, optional
        Whether to print progress information during analysis.
        Default is True.
    
    Attributes
    ----------
    base_case : Simulation or Plant
        The base simulation template object.
    params_in : dict[str, Array or Iterable]
        Input parameters dictionary as provided during initialization.
    params_out : list[str]
        Output parameters to extract from simulation results.
    save_results_detailed : bool
        Flag indicating whether to save detailed simulation objects.
    dir_output : Path or None
        Directory for saving detailed simulation results.
    path_results : Path or None
        File path for saving incremental CSV results.
    verbose : bool
        Flag for progress information printing.
    cases : Frame or None
        Frame containing all input parameter combinations to analyze.
        Created by setup_cases() method with units preserved.
    results : Frame or None
        Complete results Frame with input parameters and output metrics.
        Available after successful run_analysis() execution with units.
        
    Examples
    --------
    Basic parametric study with units preserved:
    
    >>> from antupy.analyser.par import Parametric
    >>> from antupy.array import Array
    >>> 
    >>> # Define parameter ranges with units
    >>> params_in = {
    ...     'temperature': Array([20, 25, 30], '°C'),
    ...     'flow_rate': Array([0.1, 0.2, 0.3], 'm3/s')
    ... }
    >>> 
    >>> # Create and run analysis
    >>> study = Parametric(
    ...     base_case=my_simulation,
    ...     params_in=params_in,
    ...     params_out=['efficiency', 'cost']
    ... )
    >>> results = study.run_analysis()  # Returns ap.Frame with units
    >>> 
    >>> # Access results with units
    >>> efficiency_array = results.get_values('efficiency')  # Array with units
    >>> all_outputs = results.get_values(['efficiency', 'cost'])  # Dict of Arrays
    """
    

[docs]
    def __init__(
        self,
        base_case: SimulationType,
        params_in: Mapping[str, ParameterValue],
        params_out: list[str] = [],
        save_results_detailed: bool = False,
        dir_output: Path | str | None = None,
        path_results: Path | str | None = None,
        include_gitignore: bool = False,
        isolation_mode: str = 'reuse',
        verbose: bool = True
    ):
        self.base_case = base_case
        self.params_in = params_in
        self.params_out = params_out
        self.save_results_detailed = save_results_detailed
        self.include_gitignore = include_gitignore
        self.isolation_mode = isolation_mode
        self.verbose = verbose
        
        # Validate isolation_mode
        if isolation_mode not in ['deepcopy', 'reuse']:
            raise ValueError(f"isolation_mode must be 'deepcopy' or 'reuse', got '{isolation_mode}'")
                
        # Convert paths to Path objects
        self.dir_output = Path(dir_output) if dir_output is not None else None
        self.path_results = Path(path_results) if path_results is not None else None
        
        # Internal state - using Frame instead of DataFrame
        self.cases: Frame | None = None
        self.results: Frame | None = None


    def setup_cases(
        self, 
        params_in: Mapping[str, ParameterValue]
    ) -> Frame:
        """
        Create parametric run matrix from input parameters as ap.Frame.
        
        Generates all combinations of input parameters using Cartesian product.
        Order of parameters follows "first=outer" convention where the first
        parameter varies slowest and the last parameter varies fastest.
        Units are preserved in the returned Frame.
        
        Parameters
        ----------
        params_in : dict[str, Array or Iterable]
            Dictionary mapping parameter names to their value ranges.
            Keys are parameter names (support dot notation for nested attributes).
            Values can be Array objects with units or iterables of strings/numbers.
            
        Returns
        -------
        cases : Frame
            Frame with all parameter combinations as rows and units preserved.
            Columns correspond to parameter names from params_in keys.
            Number of rows equals the product of all parameter range lengths.
            
        Examples
        --------
        Basic parameter setup with units:
        
        >>> params = {
        ...     'temp': Array([20, 30], 'K'),
        ...     'size': ['small', 'large']
        ... }
        >>> cases = study.setup_cases(params)
        >>> print(cases.units)
        {'temp': 'K', 'size': ''}
        """
        cols_in = list(params_in.keys())
        
        # Handle empty parameters case
        if not cols_in:
            self.cases = Frame()
            return self.cases
        
        params_values = []
        params_units = []
        
        for lbl, values in params_in.items():
            if isinstance(values, Array):
                # Extract values and unit from Array
                params_values.append(values.value.tolist())
                params_units.append(values.u)
            else:
                # Handle iterable (list, tuple, etc.)
                params_values.append(list(values))
                params_units.append("")  # No unit for plain iterables
        
        # Create Frame with units
        self.cases = Frame(
            list(itertools.product(*params_values)),
            columns=cols_in,
            units=params_units
        )
        return self.cases

    def _extract_outputs(
        self, 
        sim: SimulationType, 
        params_out: list[str]
    ) -> tuple[list[float], list[str]]:
        """
        Extract output values and units from simulation object.
        
        Supports extraction from Var, Array, and plain numeric types.
        For Array objects with multiple values, returns the mean.
        
        Parameters
        ----------
        sim : Simulation or Plant
            Simulation object with .out dictionary containing results.
        params_out : list[str]
            List of parameter names to extract from sim.out.
            
        Returns
        -------
        tuple[list[float], list[str]]
            Values as list of floats and corresponding units as list of strings.
            
        Examples
        --------
        Extract from simulation with mixed output types:
        
        >>> sim.out = {
        ...     'efficiency': Var(0.85, '-'),
        ...     'power': Array([1000, 1100], 'W'),
        ...     'count': 42
        ... }
        >>> values, units = self._extract_outputs(sim, ['efficiency', 'power', 'count'])
        >>> # Returns: ([0.85, 1050.0, 42.0], ['-', 'W', ''])
        """
        values = []
        units = []
        
        for param in params_out:
            if param not in sim.out:
                raise KeyError(f"Parameter '{param}' not found in sim.out")
            
            output = sim.out[param]
            
            if isinstance(output, Var):
                # Extract value and unit from Var
                values.append(float(output.v))
                units.append(output.u)
            elif isinstance(output, Array):
                # Extract value and unit from Array (use mean for multiple values)
                if len(output.value) == 1:
                    values.append(float(output.value[0]))
                else:
                    values.append(float(np.mean(output.value)))
                units.append(output.u)
            elif isinstance(output, str):
                # Handle string values (like status)
                values.append(output)
                units.append("")  # No unit for strings
            else:
                # Handle plain numbers (int, float)
                try:
                    values.append(float(output))
                except (ValueError, TypeError):
                    # If can't convert to float, store as string
                    values.append(str(output))
                units.append("")  # No unit for plain numbers
        
        return values, units

    def run_analysis(self) -> Frame:
        """
        Execute parametric analysis with configured input parameters.

        Creates all parameter combinations using setup_cases() and runs
        simulations for each, collecting specified output metrics.
        Results are returned as ap.Frame with full unit tracking.

        Returns
        -------
        Frame
            Complete results Frame with input parameters and output metrics.
            Input columns contain parameter values with units from params_in.
            Output columns contain extracted metrics with units from sim.out.
            Each row represents one completed simulation case.

        Raises
        ------
        KeyError
            If any parameter in params_out is not found in simulation outputs.
        Exception
            If simulation execution fails (errors are printed but not re-raised).      

        Examples
        --------
        Run analysis and access results with units:

        >>> study = Parametric(
        ...     base_case=simulation,
        ...     params_in={'temp': Array([20, 30], 'K')},
        ...     params_out=['efficiency', 'power_output']
        ... )
        >>> results = study.run_analysis()
        >>>
        >>> # Get values with units preserved
        >>> efficiency_array = results.get_values('efficiency')
        >>> all_results = results.get_values()
        >>>
        >>> # Access units
        >>> print(results.units)
        >>> print(results.unit('efficiency'))
        """
        # Setup cases from input parameters
        cases_in = self.setup_cases(self.params_in)

        # Initialize results with input cases and their units
        results_data = cases_in.values.copy()  # Copy input data
        results_columns = list(cases_in.columns)
        input_units = list(cases_in.units.values())

        # Will track output units as we go
        output_units = {}
        
        # For auto-detection case, we'll determine the structure dynamically
        results = results_data
        detected_params_out = None

        # If we have explicit params_out, initialize results Frame now
        if self.params_out:
            num_output_cols = len(self.params_out)
            if num_output_cols > 0:
                output_data = np.full((len(cases_in), num_output_cols), np.nan)
                results_data = np.column_stack([results_data, output_data])
                results_columns.extend(self.params_out)
                all_units_list = input_units + [""] * num_output_cols
            else:
                all_units_list = input_units
                
            results = Frame(
                results_data,
                columns=results_columns,
                units=all_units_list
            )

        # Create output directory if needed
        if self.dir_output is not None:
            self.dir_output.mkdir(parents=True, exist_ok=True)
            
            # Create .gitignore file if requested
            if self.include_gitignore:
                gitignore_path = self.dir_output / '.gitignore'
                if not gitignore_path.exists():
                    gitignore_content = (
                        "# Ignore pickle files (simulation results)\n"
                        "*.plk\n"
                        "*.pkl\n"
                        "*.pickle\n"
                    )
                    with open(gitignore_path, 'w', encoding='utf-8') as f:
                        f.write(gitignore_content)

        # Run simulations
        for index in range(len(cases_in)):
            if self.verbose:
                print(f'RUNNING SIMULATION {index + 1}/{len(cases_in)}')

            # Get input parameters for this case
            case_params = {}
            for col in cases_in.columns:
                case_params[col] = cases_in.iloc[index][col]

            # Create simulation copy and update parameters
            if self.isolation_mode == 'deepcopy':
                sim = copy.deepcopy(self.base_case)
            else:  # 'reuse' mode
                sim = self.base_case
                
            self._update_parameters(sim, case_params, cases_in.units)

            # Run simulation
            sim.run_simulation(verbose=self.verbose)

            # Determine output parameters (for auto-detection on first run)
            if not self.params_out:
                params_out = list(sim.out.keys())
                if detected_params_out is None:
                    detected_params_out = params_out
                    # Now we know the structure, create the results Frame
                    num_output_cols = len(params_out)
                    if num_output_cols > 0:
                        output_data = np.full((len(cases_in), num_output_cols), np.nan)
                        results_data = np.column_stack([results_data, output_data])
                        results_columns.extend(params_out)
                        all_units_list = input_units + [""] * num_output_cols
                    else:
                        all_units_list = input_units
                    
                    results = Frame(
                        results_data,
                        columns=results_columns,
                        units=all_units_list
                    )
            else:
                params_out = self.params_out
                # results Frame already initialized for explicit params_out case
                
                missing = [k for k in params_out if k not in sim.out]
                if missing:
                    raise KeyError(f"The following params_out are not in sim.out: {missing}")

            # Extract outputs with units
            values_out, units_out = self._extract_outputs(sim, params_out)

            # Store values in results
            for param, value in zip(params_out, values_out):
                results.loc[index, param] = value

            # Update output units tracking (first time we see each output)
            for param, unit in zip(params_out, units_out):
                if param not in output_units:
                    output_units[param] = unit

            # Save detailed results if requested
            if self.save_results_detailed and self.dir_output:
                pickle_path = self.dir_output / f'sim_{index}.plk'
                with open(pickle_path, "wb") as file:
                    pickle.dump(sim, file, protocol=pickle.HIGHEST_PROTOCOL)

            # Save intermediate results to CSV if requested
            if self.path_results is not None:
                results.to_csv(self.path_results)

            if self.verbose:
                print(f"Case {index + 1} completed: {dict(zip(params_out, values_out))}")
        
        # Handle case where no results were created (empty input)
        if results is None:
            results = Frame(
                results_data,
                columns=results_columns,
                units=input_units
            )

        # Update Frame with complete unit information
        all_units = {}
        for col in results.columns:
            if col in cases_in.columns:
                all_units[col] = cases_in.unit(col)[col]
            elif col in output_units:
                all_units[col] = output_units[col]
            else:
                all_units[col] = ""
        
        # Create final Frame with correct units
        final_results = Frame(
            results.values,
            columns=results.columns,
            units=[all_units[col] for col in results.columns]
        )
        
        self.results = final_results
        return final_results

    def _update_parameters(
        self,
        simulation: SimulationType,
        case_params: dict[str, Any],
        input_units: dict[str, str]
    ) -> None:
        """
        Update simulation object parameters with values from a case.
        
        Supports both direct attributes and nested attributes using dot notation.
        Creates Var objects for parameters with units. For SmartPlant instances,
        uses intelligent component invalidation instead of full recreation.
        
        Parameters
        ----------
        simulation : Simulation or Plant
            Simulation object to update.
        case_params : dict[str, Any]
            Dictionary mapping parameter names to values for this case.
        input_units : dict[str, str]
            Dictionary mapping parameter names to their units.
            
        Examples
        --------
        Update simulation parameters:
        
        >>> case_params = {'temperature': 25.0, 'subsystem.pressure': 2.0}
        >>> input_units = {'temperature': '°C', 'subsystem.pressure': 'bar'}
        >>> self._update_parameters(sim, case_params, input_units)
        >>> # sim.temperature becomes Var(25.0, '°C')
        >>> # sim.subsystem.pressure becomes Var(2.0, 'bar')
        """
        changed_params = set()
        
        # Track which parameters actually changed for smart component invalidation
        for param_name, value in case_params.items():
            unit = input_units.get(param_name, "")
            
            # Get old value for change detection
            if '.' in param_name:
                # Handle nested attribute
                parts = param_name.split('.')
                obj = simulation
                # Navigate to the parent object
                for part in parts[:-1]:
                    if not hasattr(obj, part):
                        obj = None
                        break
                    obj = getattr(obj, part)
                
                old_value = getattr(obj, parts[-1], None) if obj else None
            else:
                # Handle direct attribute
                old_value = getattr(simulation, param_name, None)
            
            # Set the new value
            if '.' in param_name:
                # Handle nested attribute with dot notation
                parts = param_name.split('.')
                obj = simulation
                
                # Navigate to the parent object
                for part in parts[:-1]:
                    if not hasattr(obj, part):
                        # Create intermediate object if it doesn't exist
                        setattr(obj, part, type('', (), {})())
                    obj = getattr(obj, part)
                
                # Set the final attribute
                final_attr = parts[-1]
                if unit:  # Create Var object if unit is specified
                    new_value = Var(value, unit)
                    setattr(obj, final_attr, new_value)
                else:  # Set plain value if no unit
                    setattr(obj, final_attr, value)
                    new_value = value
            else:
                # Handle direct attribute
                if unit:  # Create Var object if unit is specified
                    new_value = Var(value, unit)
                    setattr(simulation, param_name, new_value)
                else:  # Set plain value if no unit
                    setattr(simulation, param_name, value)
                    new_value = value
            
            # Check if value actually changed for smart component tracking
            if self._param_values_different(old_value, new_value):
                changed_params.add(param_name)
        
        # Use smart component invalidation if available
        if hasattr(simulation, '_component_cache') and hasattr(simulation, '_param_hash_cache'):
            if changed_params:
                # For Plant, clear component cache to force recreation
                if isinstance(simulation, Plant):
                    simulation._component_cache.clear()
                if isinstance(simulation, Plant):
                    simulation._param_hash_cache.clear()
                
                # Still call __post_init__ for any plant-level derived parameter calculations
                if hasattr(simulation, '__post_init__'):
                    simulation.__post_init__()
            # If no parameters changed, skip both invalidation and __post_init__
        elif hasattr(simulation, '__post_init__'):
            # Fallback for non-smart plants
            simulation.__post_init__()

    def _param_values_different(self, old_value, new_value) -> bool:
        """Check if parameter values are actually different."""
        # Handle Var objects
        if hasattr(old_value, 'gv') and hasattr(new_value, 'gv'):
            return (old_value.gv() != new_value.gv() or 
                    old_value.unit != new_value.unit)
        
        # Handle case where one is Var and other is not
        if hasattr(old_value, 'gv') and not hasattr(new_value, 'gv'):
            return True
        if not hasattr(old_value, 'gv') and hasattr(new_value, 'gv'):
            return True
        
        # Handle regular values
        try:
            return old_value != new_value
        except:
            # Fallback for complex objects
            return str(old_value) != str(new_value)

    @overload
    def get_output_arrays(self, cols: str) -> Array: ...
    
    @overload
    def get_output_arrays(self, cols: list[str]) -> dict[str, Array]: ...
    
    @overload
    def get_output_arrays(self, cols: None = None) -> dict[str, Array]: ...
    
    def get_output_arrays(self, cols: str | list[str] | None = None) -> dict[str, Array] | Array:
        """
        Get analysis results as Array objects with units preserved.
        
        Parameters
        ----------
        cols : str, list[str], or None
            Column name(s) to return as Arrays. If None, returns all columns.
            
        Returns
        -------
        dict[str, Array] or Array
            If cols is str: single Array object.
            If cols is list or None: dict mapping column names to Array objects.
            
        Raises
        ------
        ValueError
            If no results are available (run_analysis() not called yet).
        """
        if self.results is None:
            raise ValueError("No results available. Run run_analysis() first.")
        
        return self.results.get_values(cols)

    def get_summary(self) -> dict[str, Any]:
        """
        Generate summary of parametric analysis results.
        
        Returns
        -------
        dict[str, Any]
            Summary statistics including case counts, parameter lists,
            completion status, units information, and statistical summaries
            for each output parameter.
            
        Examples
        --------
        Get analysis summary:
        
        >>> summary = study.get_summary()
        >>> print(f"Completed {summary['total_cases']} cases")
        >>> print(f"Input units: {summary['input_units']}")
        >>> print(f"Efficiency stats: {summary['efficiency_stats']}")
        """
        if self.results is None:
            return {"status": "No analysis completed"}
        
        if self.cases is None or len(self.cases) == 0:
            return {"status": "No cases defined"}
        
        # Basic information
        summary = {
            "total_cases": len(self.results),
            "input_parameters": [col for col in self.results.columns if col in self.cases.columns],
            "output_parameters": [col for col in self.results.columns if col not in self.cases.columns],
            "completed": True
        }
        
        # Units information
        summary["input_units"] = {}
        summary["output_units"] = {}
        
        for col in summary["input_parameters"]:
            summary["input_units"][col] = self.results.unit(col)[col]
        
        for col in summary["output_parameters"]:
            summary["output_units"][col] = self.results.unit(col)[col]
        
        # Statistical summaries for output parameters
        for col in summary["output_parameters"]:
            data = self.results[col].dropna()  # Remove NaN values
            if len(data) > 0:
                unit = self.results.unit(col)[col]
                summary[f"{col}_stats"] = {
                    "mean": float(data.mean()),
                    "std": float(data.std()),
                    "min": float(data.min()),
                    "max": float(data.max()),
                    "unit": unit
                }
        
        return summary



def main():
    pass

if __name__ == "__main__":
    main()