"""
Layer 1B: Physics Features
- Extract 10 physics-inspired features per batch.
- Domain knowledge extraction from signals.
"""

import pandas as pd
import numpy as np
import pickle
import os
import sys

# Add parent directory to path for config import
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config import CFG

def extract_physics_single(batch_df: pd.DataFrame) -> pd.Series:
    """
    Extracts 10 physics-inspired features for a single batch.
    
    Args:
        batch_df (pd.DataFrame): Cleaned batch data
        
    Returns:
        pd.Series: 10 physics features
    """
    feats = {}
    
    # Pre-process signals
    vib = batch_df["Vibration_mm_s"].values if "Vibration_mm_s" in batch_df.columns else np.zeros(1)
    power = batch_df["Power_Consumption_kW"].values if "Power_Consumption_kW" in batch_df.columns else np.zeros(1)
    motor = batch_df["Motor_Speed_RPM"].values if "Motor_Speed_RPM" in batch_df.columns else np.zeros(1)
    temp = batch_df["Temperature_C"].values if "Temperature_C" in batch_df.columns else np.zeros(1)
    
    # 1. rms_vibration
    feats["rms_vibration"] = float(np.sqrt(np.mean(np.square(vib)))) if len(vib) > 0 else 0.0
    
    # 2. total_energy_kwh
    feats["total_energy_kwh"] = float(np.sum(power) / 60.0)
    
    # 3. power_ramp_rate
    if len(power) > 1:
        feats["power_ramp_rate"] = float(np.mean(np.abs(np.diff(power))))
    else:
        feats["power_ramp_rate"] = 0.0
        
    # 4. power_peak_to_mean
    p_mean = np.mean(power)
    if p_mean > 0:
        feats["power_peak_to_mean"] = float(np.max(power) / p_mean)
    else:
        feats["power_peak_to_mean"] = 0.0
        
    # 5. vibration_entropy
    if len(vib) > 1 and np.std(vib) > 0:
        hist, _ = np.histogram(vib, bins=10, density=True)
        # Normalize to probability
        p = hist / (np.sum(hist) + 1e-10)
        feats["vibration_entropy"] = float(-np.sum(p * np.log(p + 1e-10)))
    else:
        feats["vibration_entropy"] = 0.0
        
    # 6. motor_utilization
    if len(motor) > 0:
        feats["motor_utilization"] = float(np.mean(motor > 0))
    else:
        feats["motor_utilization"] = 0.0
        
    # Phase-specific features
    # 7. compression_energy_kwh
    comp_df = batch_df[batch_df["Phase"] == "Compression"]
    if not comp_df.empty:
        feats["compression_energy_kwh"] = float(np.sum(comp_df["Power_Consumption_kW"]) / 60.0)
        # 8. compression_vib_rms
        cvib = comp_df["Vibration_mm_s"].values
        feats["compression_vib_rms"] = float(np.sqrt(np.mean(np.square(cvib)))) if len(cvib) > 0 else 0.0
    else:
        feats["compression_energy_kwh"] = 0.0
        feats["compression_vib_rms"] = 0.0
        
    # 9. drying_efficiency
    dry_df = batch_df[batch_df["Phase"] == "Drying"]
    if not dry_df.empty:
        d_p_mean = np.mean(dry_df["Power_Consumption_kW"])
        if d_p_mean > 0:
            feats["drying_efficiency"] = float(np.mean(dry_df["Temperature_C"]) / d_p_mean)
        else:
            feats["drying_efficiency"] = 0.0
    else:
        feats["drying_efficiency"] = 0.0
        
    # 10. granulation_power_stability
    gran_df = batch_df[batch_df["Phase"] == "Granulation"]
    if not gran_df.empty:
        feats["granulation_power_stability"] = float(np.std(gran_df["Power_Consumption_kW"]))
    else:
        feats["granulation_power_stability"] = 0.0
        
    return pd.Series(feats)

def main():
    print(">>> Starting Layer 1B: Physics Features")
    with open(os.path.join(CFG.PROC_DIR, "process_clean.pkl"), "rb") as f:
        df = pickle.load(f)
        
    batch_ids = df["Batch_ID"].unique()
    all_physics = []
    
    print(f"Extracting physics features for {len(batch_ids)} batches...")
    for bid in batch_ids:
        batch_df = df[df["Batch_ID"] == bid]
        phys_s = extract_physics_single(batch_df)
        phys_s.name = bid
        all_physics.append(phys_s)
        
    feat_df = pd.concat(all_physics, axis=1).T
    feat_df.index.name = "Batch_ID"
    
    # Save output
    output_path = os.path.join(CFG.PROC_DIR, "physics_features.pkl")
    with open(output_path, "wb") as f:
        pickle.dump(feat_df, f)
        
    print(f"Layer 1B complete. Shape: {feat_df.shape}")
    print("="*60)
    print(f"LAYER 1B COMPLETE")
    print(f"   Output shape: {feat_df.shape}")
    print(f"   NaN count:    {feat_df.isna().sum().sum()}")
    print(f"   Output file:  {output_path}")
    print("="*60)

if __name__ == "__main__":
    main()