| """Generate realistic synthetic PFAS-SBEAD reactor experimental data (100+ records).""" |
|
|
| from __future__ import annotations |
|
|
| import numpy as np |
| import pandas as pd |
|
|
| RNG = np.random.default_rng(42) |
|
|
|
|
| def generate_sbead_dataset(n: int = 120) -> pd.DataFrame: |
| """ |
| Generate n experimental records for PFAS degradation in SBEAD reactor. |
| Parameters follow ranges from the Final PFAS-SBEAD AI Pipeline document. |
| """ |
| olr = RNG.uniform(1.0, 6.0, n) |
| hrt = RNG.uniform(10.0, 30.0, n) |
| ph = RNG.uniform(6.5, 8.0, n) |
| temperature = RNG.uniform(30.0, 42.0, n) |
| cod = RNG.uniform(2000.0, 8000.0, n) |
| scod = cod * RNG.uniform(0.55, 0.85, n) |
| vfa = RNG.uniform(100.0, 1500.0, n) |
| alkalinity = RNG.uniform(1500.0, 5000.0, n) |
|
|
| voltage = RNG.uniform(0.2, 1.2, n) |
| current = voltage * RNG.uniform(0.5, 3.0, n) |
| electrode_area = RNG.uniform(0.5, 2.0, n) |
| current_density = current / electrode_area |
| conductivity = RNG.uniform(1.0, 8.0, n) |
| electrode_spacing = RNG.uniform(1.0, 5.0, n) |
|
|
| initial_pfas = RNG.uniform(50.0, 500.0, n) |
|
|
| base_degradation = ( |
| 0.12 |
| + 0.08 * (voltage / 1.2) |
| + 0.06 * (current_density / current_density.max()) |
| + 0.05 * ((ph - 6.5) / 1.5) |
| + 0.04 * (hrt / 30.0) |
| + 0.03 * (olr / 6.0) |
| + 0.02 * ((temperature - 30.0) / 12.0) |
| ) |
| noise = RNG.normal(0, 0.04, n) |
| pfas_degradation_pct = np.clip((base_degradation + noise) * 100, 5.0, 65.0) |
|
|
| final_pfas = initial_pfas * (1 - pfas_degradation_pct / 100) |
| pfas_adsorbed_sludge = initial_pfas * RNG.uniform(0.02, 0.12, n) |
| pfas_adsorbed_electrode = initial_pfas * RNG.uniform(0.01, 0.08, n) |
|
|
| fluoride_release = ( |
| pfas_degradation_pct * RNG.uniform(0.3, 0.7, n) |
| + voltage * RNG.uniform(2.0, 8.0, n) |
| ) |
| defluorination_pct = np.clip(fluoride_release / (initial_pfas * 0.15) * 100, 2.0, 55.0) |
|
|
| short_chain_risk_base = ( |
| 0.3 |
| - 0.15 * (pfas_degradation_pct / 65.0) |
| + 0.1 * (vfa / 1500.0) |
| - 0.05 * (voltage / 1.2) |
| ) |
| short_chain_formation = np.clip( |
| short_chain_risk_base + RNG.normal(0, 0.05, n), 0.05, 0.6 |
| ) |
|
|
| ph_drop = np.clip( |
| RNG.uniform(0.1, 0.8, n) + 0.2 * (olr / 6.0) - 0.1 * (alkalinity / 5000.0), |
| 0.0, 1.5, |
| ) |
| vfa_accumulation = np.clip( |
| vfa * (1.0 - pfas_degradation_pct / 100 * 0.3) + RNG.normal(0, 50, n), |
| 50.0, 2000.0, |
| ) |
| orp_drift = RNG.uniform(-50.0, 50.0, n) |
| current_instability = np.clip( |
| RNG.uniform(0.0, 0.3, n) + 0.1 * (olr / 6.0), |
| 0.0, 0.5, |
| ) |
|
|
| energy_input = voltage * current * 24 / 1000 |
| ai_score = ( |
| 0.40 * (pfas_degradation_pct / 65.0) |
| + 0.30 * (defluorination_pct / 55.0) |
| - 0.15 * short_chain_formation |
| - 0.10 * (energy_input / energy_input.max()) |
| - 0.05 * current_instability |
| ) |
| ai_score = np.clip(ai_score, 0, 1) |
|
|
| instability_flag = ( |
| (ph_drop > 0.8) | (vfa_accumulation > 1200) | (current_instability > 0.35) |
| ).astype(int) |
|
|
| experiment_id = np.arange(1, n + 1) |
|
|
| df = pd.DataFrame({ |
| "experiment_id": experiment_id, |
| "OLR_kg_m3_d": np.round(olr, 3), |
| "HRT_days": np.round(hrt, 1), |
| "pH": np.round(ph, 2), |
| "temperature_C": np.round(temperature, 1), |
| "COD_mg_L": np.round(cod, 0), |
| "SCOD_mg_L": np.round(scod, 0), |
| "VFA_mg_L": np.round(vfa, 0), |
| "alkalinity_mg_CaCO3_L": np.round(alkalinity, 0), |
| "voltage_V": np.round(voltage, 3), |
| "current_A": np.round(current, 3), |
| "current_density_A_m2": np.round(current_density, 2), |
| "conductivity_mS_cm": np.round(conductivity, 2), |
| "electrode_area_m2": np.round(electrode_area, 3), |
| "electrode_spacing_cm": np.round(electrode_spacing, 2), |
| "initial_PFAS_ug_L": np.round(initial_pfas, 1), |
| "final_PFAS_ug_L": np.round(final_pfas, 1), |
| "PFAS_degradation_pct": np.round(pfas_degradation_pct, 2), |
| "PFAS_adsorbed_sludge_ug_L": np.round(pfas_adsorbed_sludge, 2), |
| "PFAS_adsorbed_electrode_ug_L": np.round(pfas_adsorbed_electrode, 2), |
| "fluoride_release_mg_L": np.round(fluoride_release, 2), |
| "defluorination_pct": np.round(defluorination_pct, 2), |
| "short_chain_formation_ratio": np.round(short_chain_formation, 4), |
| "pH_drop": np.round(ph_drop, 3), |
| "VFA_accumulation_mg_L": np.round(vfa_accumulation, 0), |
| "ORP_drift_mV": np.round(orp_drift, 1), |
| "current_instability_index": np.round(current_instability, 4), |
| "energy_input_kWh_d": np.round(energy_input, 4), |
| "AI_score": np.round(ai_score, 4), |
| "instability_flag": instability_flag, |
| }) |
| return df |
|
|
|
|
| def generate_mass_balance_data(df: pd.DataFrame) -> pd.DataFrame: |
| """Compute PFAS mass balance for each experiment.""" |
| remaining = df["final_PFAS_ug_L"] |
| adsorbed_sludge = df["PFAS_adsorbed_sludge_ug_L"] |
| adsorbed_electrode = df["PFAS_adsorbed_electrode_ug_L"] |
| short_chain_products = df["initial_PFAS_ug_L"] * df["short_chain_formation_ratio"] |
| mineralized = ( |
| df["initial_PFAS_ug_L"] |
| - remaining |
| - adsorbed_sludge |
| - adsorbed_electrode |
| - short_chain_products |
| ) |
| mineralized = np.clip(mineralized, 0, None) |
|
|
| return pd.DataFrame({ |
| "experiment_id": df["experiment_id"], |
| "initial_PFAS_ug_L": df["initial_PFAS_ug_L"], |
| "remaining_in_water_ug_L": np.round(remaining, 2), |
| "adsorbed_sludge_ug_L": np.round(adsorbed_sludge, 2), |
| "adsorbed_electrode_ug_L": np.round(adsorbed_electrode, 2), |
| "short_chain_products_ug_L": np.round(short_chain_products, 2), |
| "mineralized_PFAS_ug_L": np.round(mineralized, 2), |
| "mass_balance_closure_pct": np.round( |
| (remaining + adsorbed_sludge + adsorbed_electrode + short_chain_products + mineralized) |
| / df["initial_PFAS_ug_L"] * 100, |
| 2, |
| ), |
| }) |
|
|
|
|
| if __name__ == "__main__": |
| df = generate_sbead_dataset(120) |
| df.to_csv("data/sbead_experiments.csv", index=False) |
| mb = generate_mass_balance_data(df) |
| mb.to_csv("data/mass_balance.csv", index=False) |
| print(f"Generated {len(df)} experiments and mass balance data.") |
|
|