batchmind-os / batchmind_os /layer1_features /physics_features.py
23f3002638
Initial commit with LFS tracking
038ee19
"""
Layer 1B: Physics Features
- Extract 10 physics-inspired features per batch.
- Domain knowledge extraction from signals.
"""
import pandas as pd
import numpy as np
import pickle
import os
import sys
# Add parent directory to path for config import
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config import CFG
def extract_physics_single(batch_df: pd.DataFrame) -> pd.Series:
"""
Extracts 10 physics-inspired features for a single batch.
Args:
batch_df (pd.DataFrame): Cleaned batch data
Returns:
pd.Series: 10 physics features
"""
feats = {}
# Pre-process signals
vib = batch_df["Vibration_mm_s"].values if "Vibration_mm_s" in batch_df.columns else np.zeros(1)
power = batch_df["Power_Consumption_kW"].values if "Power_Consumption_kW" in batch_df.columns else np.zeros(1)
motor = batch_df["Motor_Speed_RPM"].values if "Motor_Speed_RPM" in batch_df.columns else np.zeros(1)
temp = batch_df["Temperature_C"].values if "Temperature_C" in batch_df.columns else np.zeros(1)
# 1. rms_vibration
feats["rms_vibration"] = float(np.sqrt(np.mean(np.square(vib)))) if len(vib) > 0 else 0.0
# 2. total_energy_kwh
feats["total_energy_kwh"] = float(np.sum(power) / 60.0)
# 3. power_ramp_rate
if len(power) > 1:
feats["power_ramp_rate"] = float(np.mean(np.abs(np.diff(power))))
else:
feats["power_ramp_rate"] = 0.0
# 4. power_peak_to_mean
p_mean = np.mean(power)
if p_mean > 0:
feats["power_peak_to_mean"] = float(np.max(power) / p_mean)
else:
feats["power_peak_to_mean"] = 0.0
# 5. vibration_entropy
if len(vib) > 1 and np.std(vib) > 0:
hist, _ = np.histogram(vib, bins=10, density=True)
# Normalize to probability
p = hist / (np.sum(hist) + 1e-10)
feats["vibration_entropy"] = float(-np.sum(p * np.log(p + 1e-10)))
else:
feats["vibration_entropy"] = 0.0
# 6. motor_utilization
if len(motor) > 0:
feats["motor_utilization"] = float(np.mean(motor > 0))
else:
feats["motor_utilization"] = 0.0
# Phase-specific features
# 7. compression_energy_kwh
comp_df = batch_df[batch_df["Phase"] == "Compression"]
if not comp_df.empty:
feats["compression_energy_kwh"] = float(np.sum(comp_df["Power_Consumption_kW"]) / 60.0)
# 8. compression_vib_rms
cvib = comp_df["Vibration_mm_s"].values
feats["compression_vib_rms"] = float(np.sqrt(np.mean(np.square(cvib)))) if len(cvib) > 0 else 0.0
else:
feats["compression_energy_kwh"] = 0.0
feats["compression_vib_rms"] = 0.0
# 9. drying_efficiency
dry_df = batch_df[batch_df["Phase"] == "Drying"]
if not dry_df.empty:
d_p_mean = np.mean(dry_df["Power_Consumption_kW"])
if d_p_mean > 0:
feats["drying_efficiency"] = float(np.mean(dry_df["Temperature_C"]) / d_p_mean)
else:
feats["drying_efficiency"] = 0.0
else:
feats["drying_efficiency"] = 0.0
# 10. granulation_power_stability
gran_df = batch_df[batch_df["Phase"] == "Granulation"]
if not gran_df.empty:
feats["granulation_power_stability"] = float(np.std(gran_df["Power_Consumption_kW"]))
else:
feats["granulation_power_stability"] = 0.0
return pd.Series(feats)
def main():
print(">>> Starting Layer 1B: Physics Features")
with open(os.path.join(CFG.PROC_DIR, "process_clean.pkl"), "rb") as f:
df = pickle.load(f)
batch_ids = df["Batch_ID"].unique()
all_physics = []
print(f"Extracting physics features for {len(batch_ids)} batches...")
for bid in batch_ids:
batch_df = df[df["Batch_ID"] == bid]
phys_s = extract_physics_single(batch_df)
phys_s.name = bid
all_physics.append(phys_s)
feat_df = pd.concat(all_physics, axis=1).T
feat_df.index.name = "Batch_ID"
# Save output
output_path = os.path.join(CFG.PROC_DIR, "physics_features.pkl")
with open(output_path, "wb") as f:
pickle.dump(feat_df, f)
print(f"Layer 1B complete. Shape: {feat_df.shape}")
print("="*60)
print(f"LAYER 1B COMPLETE")
print(f" Output shape: {feat_df.shape}")
print(f" NaN count: {feat_df.isna().sum().sum()}")
print(f" Output file: {output_path}")
print("="*60)
if __name__ == "__main__":
main()