| |
|
| |
|
| | import pandas as pd
|
| | from core.estimation.descriptive import compute_descriptive_statistics
|
| |
|
| |
|
| | def run_descriptive_statistics(
|
| | *,
|
| | df: pd.DataFrame,
|
| | column: str,
|
| | quantile_probs: list[float],
|
| | trim_alpha: float | None,
|
| | winsor_limits: tuple[float, float] | None,
|
| | weights_col: str | None,
|
| | round_digits: int,
|
| | ) -> pd.DataFrame:
|
| |
|
| | if df is None:
|
| | raise ValueError("No dataset loaded.")
|
| |
|
| | if column not in df.columns:
|
| | raise ValueError(f"Column '{column}' not found.")
|
| |
|
| | series = df[column].dropna()
|
| |
|
| | if series.empty:
|
| | raise ValueError("Selected column has no valid data.")
|
| |
|
| | if not pd.api.types.is_numeric_dtype(series):
|
| | raise ValueError("Selected column must be numeric.")
|
| |
|
| | weights = None
|
| |
|
| | if weights_col:
|
| | if weights_col not in df.columns:
|
| | raise ValueError(f"Weights column '{weights_col}' not found.")
|
| |
|
| | weights = df.loc[series.index, weights_col]
|
| |
|
| | if not pd.api.types.is_numeric_dtype(weights):
|
| | raise ValueError("Weights must be numeric.")
|
| |
|
| | if (weights < 0).any():
|
| | raise ValueError("Weights must be non-negative.")
|
| |
|
| | stats_df = compute_descriptive_statistics(
|
| | data=series.values,
|
| | quantile_probs=quantile_probs,
|
| | trim_alpha=trim_alpha,
|
| | winsor_limits=winsor_limits,
|
| | weights=weights.values if weights is not None else None,
|
| | )
|
| |
|
| |
|
| |
|
| |
|
| | stats_df[["Value", "Bias Corrected"]] = stats_df[["Value", "Bias Corrected"]].round(round_digits)
|
| |
|
| | return stats_df |