Spaces:
Running
Running
| """ | |
| Layer 1B: Physics Features | |
| - Extract 10 physics-inspired features per batch. | |
| - Domain knowledge extraction from signals. | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| import pickle | |
| import os | |
| import sys | |
| # Add parent directory to path for config import | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from config import CFG | |
| def extract_physics_single(batch_df: pd.DataFrame) -> pd.Series: | |
| """ | |
| Extracts 10 physics-inspired features for a single batch. | |
| Args: | |
| batch_df (pd.DataFrame): Cleaned batch data | |
| Returns: | |
| pd.Series: 10 physics features | |
| """ | |
| feats = {} | |
| # Pre-process signals | |
| vib = batch_df["Vibration_mm_s"].values if "Vibration_mm_s" in batch_df.columns else np.zeros(1) | |
| power = batch_df["Power_Consumption_kW"].values if "Power_Consumption_kW" in batch_df.columns else np.zeros(1) | |
| motor = batch_df["Motor_Speed_RPM"].values if "Motor_Speed_RPM" in batch_df.columns else np.zeros(1) | |
| temp = batch_df["Temperature_C"].values if "Temperature_C" in batch_df.columns else np.zeros(1) | |
| # 1. rms_vibration | |
| feats["rms_vibration"] = float(np.sqrt(np.mean(np.square(vib)))) if len(vib) > 0 else 0.0 | |
| # 2. total_energy_kwh | |
| feats["total_energy_kwh"] = float(np.sum(power) / 60.0) | |
| # 3. power_ramp_rate | |
| if len(power) > 1: | |
| feats["power_ramp_rate"] = float(np.mean(np.abs(np.diff(power)))) | |
| else: | |
| feats["power_ramp_rate"] = 0.0 | |
| # 4. power_peak_to_mean | |
| p_mean = np.mean(power) | |
| if p_mean > 0: | |
| feats["power_peak_to_mean"] = float(np.max(power) / p_mean) | |
| else: | |
| feats["power_peak_to_mean"] = 0.0 | |
| # 5. vibration_entropy | |
| if len(vib) > 1 and np.std(vib) > 0: | |
| hist, _ = np.histogram(vib, bins=10, density=True) | |
| # Normalize to probability | |
| p = hist / (np.sum(hist) + 1e-10) | |
| feats["vibration_entropy"] = float(-np.sum(p * np.log(p + 1e-10))) | |
| else: | |
| feats["vibration_entropy"] = 0.0 | |
| # 6. motor_utilization | |
| if len(motor) > 0: | |
| feats["motor_utilization"] = float(np.mean(motor > 0)) | |
| else: | |
| feats["motor_utilization"] = 0.0 | |
| # Phase-specific features | |
| # 7. compression_energy_kwh | |
| comp_df = batch_df[batch_df["Phase"] == "Compression"] | |
| if not comp_df.empty: | |
| feats["compression_energy_kwh"] = float(np.sum(comp_df["Power_Consumption_kW"]) / 60.0) | |
| # 8. compression_vib_rms | |
| cvib = comp_df["Vibration_mm_s"].values | |
| feats["compression_vib_rms"] = float(np.sqrt(np.mean(np.square(cvib)))) if len(cvib) > 0 else 0.0 | |
| else: | |
| feats["compression_energy_kwh"] = 0.0 | |
| feats["compression_vib_rms"] = 0.0 | |
| # 9. drying_efficiency | |
| dry_df = batch_df[batch_df["Phase"] == "Drying"] | |
| if not dry_df.empty: | |
| d_p_mean = np.mean(dry_df["Power_Consumption_kW"]) | |
| if d_p_mean > 0: | |
| feats["drying_efficiency"] = float(np.mean(dry_df["Temperature_C"]) / d_p_mean) | |
| else: | |
| feats["drying_efficiency"] = 0.0 | |
| else: | |
| feats["drying_efficiency"] = 0.0 | |
| # 10. granulation_power_stability | |
| gran_df = batch_df[batch_df["Phase"] == "Granulation"] | |
| if not gran_df.empty: | |
| feats["granulation_power_stability"] = float(np.std(gran_df["Power_Consumption_kW"])) | |
| else: | |
| feats["granulation_power_stability"] = 0.0 | |
| return pd.Series(feats) | |
| def main(): | |
| print(">>> Starting Layer 1B: Physics Features") | |
| with open(os.path.join(CFG.PROC_DIR, "process_clean.pkl"), "rb") as f: | |
| df = pickle.load(f) | |
| batch_ids = df["Batch_ID"].unique() | |
| all_physics = [] | |
| print(f"Extracting physics features for {len(batch_ids)} batches...") | |
| for bid in batch_ids: | |
| batch_df = df[df["Batch_ID"] == bid] | |
| phys_s = extract_physics_single(batch_df) | |
| phys_s.name = bid | |
| all_physics.append(phys_s) | |
| feat_df = pd.concat(all_physics, axis=1).T | |
| feat_df.index.name = "Batch_ID" | |
| # Save output | |
| output_path = os.path.join(CFG.PROC_DIR, "physics_features.pkl") | |
| with open(output_path, "wb") as f: | |
| pickle.dump(feat_df, f) | |
| print(f"Layer 1B complete. Shape: {feat_df.shape}") | |
| print("="*60) | |
| print(f"LAYER 1B COMPLETE") | |
| print(f" Output shape: {feat_df.shape}") | |
| print(f" NaN count: {feat_df.isna().sum().sum()}") | |
| print(f" Output file: {output_path}") | |
| print("="*60) | |
| if __name__ == "__main__": | |
| main() | |