""" Layer 1B: Physics Features - Extract 10 physics-inspired features per batch. - Domain knowledge extraction from signals. """ import pandas as pd import numpy as np import pickle import os import sys # Add parent directory to path for config import sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from config import CFG def extract_physics_single(batch_df: pd.DataFrame) -> pd.Series: """ Extracts 10 physics-inspired features for a single batch. Args: batch_df (pd.DataFrame): Cleaned batch data Returns: pd.Series: 10 physics features """ feats = {} # Pre-process signals vib = batch_df["Vibration_mm_s"].values if "Vibration_mm_s" in batch_df.columns else np.zeros(1) power = batch_df["Power_Consumption_kW"].values if "Power_Consumption_kW" in batch_df.columns else np.zeros(1) motor = batch_df["Motor_Speed_RPM"].values if "Motor_Speed_RPM" in batch_df.columns else np.zeros(1) temp = batch_df["Temperature_C"].values if "Temperature_C" in batch_df.columns else np.zeros(1) # 1. rms_vibration feats["rms_vibration"] = float(np.sqrt(np.mean(np.square(vib)))) if len(vib) > 0 else 0.0 # 2. total_energy_kwh feats["total_energy_kwh"] = float(np.sum(power) / 60.0) # 3. power_ramp_rate if len(power) > 1: feats["power_ramp_rate"] = float(np.mean(np.abs(np.diff(power)))) else: feats["power_ramp_rate"] = 0.0 # 4. power_peak_to_mean p_mean = np.mean(power) if p_mean > 0: feats["power_peak_to_mean"] = float(np.max(power) / p_mean) else: feats["power_peak_to_mean"] = 0.0 # 5. vibration_entropy if len(vib) > 1 and np.std(vib) > 0: hist, _ = np.histogram(vib, bins=10, density=True) # Normalize to probability p = hist / (np.sum(hist) + 1e-10) feats["vibration_entropy"] = float(-np.sum(p * np.log(p + 1e-10))) else: feats["vibration_entropy"] = 0.0 # 6. motor_utilization if len(motor) > 0: feats["motor_utilization"] = float(np.mean(motor > 0)) else: feats["motor_utilization"] = 0.0 # Phase-specific features # 7. compression_energy_kwh comp_df = batch_df[batch_df["Phase"] == "Compression"] if not comp_df.empty: feats["compression_energy_kwh"] = float(np.sum(comp_df["Power_Consumption_kW"]) / 60.0) # 8. compression_vib_rms cvib = comp_df["Vibration_mm_s"].values feats["compression_vib_rms"] = float(np.sqrt(np.mean(np.square(cvib)))) if len(cvib) > 0 else 0.0 else: feats["compression_energy_kwh"] = 0.0 feats["compression_vib_rms"] = 0.0 # 9. drying_efficiency dry_df = batch_df[batch_df["Phase"] == "Drying"] if not dry_df.empty: d_p_mean = np.mean(dry_df["Power_Consumption_kW"]) if d_p_mean > 0: feats["drying_efficiency"] = float(np.mean(dry_df["Temperature_C"]) / d_p_mean) else: feats["drying_efficiency"] = 0.0 else: feats["drying_efficiency"] = 0.0 # 10. granulation_power_stability gran_df = batch_df[batch_df["Phase"] == "Granulation"] if not gran_df.empty: feats["granulation_power_stability"] = float(np.std(gran_df["Power_Consumption_kW"])) else: feats["granulation_power_stability"] = 0.0 return pd.Series(feats) def main(): print(">>> Starting Layer 1B: Physics Features") with open(os.path.join(CFG.PROC_DIR, "process_clean.pkl"), "rb") as f: df = pickle.load(f) batch_ids = df["Batch_ID"].unique() all_physics = [] print(f"Extracting physics features for {len(batch_ids)} batches...") for bid in batch_ids: batch_df = df[df["Batch_ID"] == bid] phys_s = extract_physics_single(batch_df) phys_s.name = bid all_physics.append(phys_s) feat_df = pd.concat(all_physics, axis=1).T feat_df.index.name = "Batch_ID" # Save output output_path = os.path.join(CFG.PROC_DIR, "physics_features.pkl") with open(output_path, "wb") as f: pickle.dump(feat_df, f) print(f"Layer 1B complete. Shape: {feat_df.shape}") print("="*60) print(f"LAYER 1B COMPLETE") print(f" Output shape: {feat_df.shape}") print(f" NaN count: {feat_df.isna().sum().sum()}") print(f" Output file: {output_path}") print("="*60) if __name__ == "__main__": main()