""" EV Camper Mock Data Generator ============================== Generates realistic power and water CSV exports following the discussed schema, derived from data.json lookup tables. Column naming convention: *_kW instantaneous power (flow files: 1SEC, 1MIN, 15MIN) *_kWh accumulated energy (energy files: 1H, 1DAY) *_V voltage (instantaneous or averaged) *_Ah amp-hours (battery state snapshot) *_Pct percentage (battery / tank level) *_Lpm litres per minute (water flow files: 1MIN, 15MIN) *_L litres (water energy files: 1H, 1DAY, and tank level snapshots) Output ZIP contains: power/ -> 1SEC.csv, 1MIN.csv, 15MIN.csv, 1H.csv, 1DAY.csv water/ -> 1MIN.csv, 15MIN.csv, 1H.csv, 1DAY.csv Usage: python generate_mock_data.py [--config data.json] [--out output] [--seed 42] python generate_mock_data.py --user Glamper --people 2 --days 5 --temp Hot """ import json import csv import os import math import random import zipfile import argparse from datetime import datetime, timedelta from pathlib import Path # --------------------------------------------------------------------------- # CONSTANTS # --------------------------------------------------------------------------- GAL_TO_LITRES = 3.78541 MINS_PER_DAY = 1440 SOLAR_PANEL_AREA_M2 = 1.8 # ~330W panel footprint BATTERY_NOMINAL_V = 48.0 # for kWh <-> Ah conversion # --------------------------------------------------------------------------- # DATA LOADER # --------------------------------------------------------------------------- def load_data(path: str) -> dict: with open(path) as f: return json.load(f) # --------------------------------------------------------------------------- # DAILY BUDGET CALCULATORS # --------------------------------------------------------------------------- def calc_power_budget(data: dict, user: str, people: int, temp: str, hvac_hrs: float) -> dict: """ Returns expected kWh per day broken down by circuit + solar generation. Derived from component voltage x amps tables and user profile runtimes. """ p = data["lookups"]["user_profiles"]["profiles"][user] tb = p["time_based"] cb = p["count_based"] def watts(v, a): return v * a # HVAC hvac_kwh = data["lookups"]["hvac_energy_wh_day"][temp] / 1000.0 # Lighting (12V 12A) lighting_kwh = watts(12, 12) * (tb["mins_per_day"].get("living_lighting", 300) / 60) / 1000 # Devices: always-on sensors/compute + active electronics devices_kwh = ( 5 * 12 * 24 # sensor idle + 1.25 * 12 * 24 # compute idle + 0.5 * 120 * tb["hrs_per_day"].get("living_electronics", 16) ) / 1000 # Fridge (24h) fridge_kwh = watts(120, 0.5137) * 24 / 1000 # Water pump meals = cb["meals_per_day"]["cooking"] shower_cyc = cb["cycles_per_day_per_person"]["shower"] * people toilet_cyc = cb["cycles_per_day_per_person"]["toilet"] * people pump_mins = (tb["mins_per_meal"].get("cooking_pump", 3.625) * meals + tb["mins_per_cycle"].get("shower_water_pump", 6) * shower_cyc + 1.0 * toilet_cyc) water_pump_kwh = watts(12, 8.5) * (pump_mins / 60) / 1000 # Cooking (stove + microwave + water heater) cooking_kwh = ( watts(240, 12.5) * (tb["mins_per_meal"].get("cooking_stove", 15) * meals / 60) + watts(120, 8.333) * (tb["mins_per_meal"].get("cooking_microwave", 3.5) * meals / 60) + watts(240, 33.4) * ((tb["mins_per_meal"].get("cooking_water_heater", 3) * meals + tb["mins_per_cycle"].get("shower_duration", 6) * shower_cyc) / 60) ) / 1000 # Inverter load (TV as representative AC load) inverter_kwh = watts(120, 0.7) * (tb["mins_per_day"].get("living_tv", 60) / 60) / 1000 # Solar generation sol = data["lookups"]["solar"] humidity = data["inputs"]["params"]["humidity"]["value"] sunlight = data["inputs"]["params"]["sunlight"]["value"] insolation = sol["insolation_wh_m2_day"][temp][humidity] num_panels = (data["trailer_specs"]["specs"]["num_solar_panels"]["value"] if "num_solar_panels" in data["trailer_specs"]["specs"] else 35) solar_kwh = (insolation * num_panels * SOLAR_PANEL_AREA_M2 * sol["system_loss_factor"] * sol["tilt_factor"][temp] * sol["sunlight_factor"][sunlight]) / 1_000 solar_kwh = min(solar_kwh, data["trailer_specs"]["specs"]["solar_capacity_kw"]["value"] * 6) return { "solar_kwh": round(solar_kwh, 3), "hvac_kwh": round(hvac_kwh, 3), "lighting_kwh": round(lighting_kwh, 3), "devices_kwh": round(devices_kwh, 3), "fridge_kwh": round(fridge_kwh, 3), "water_pump_kwh": round(water_pump_kwh, 3), "cooking_kwh": round(cooking_kwh, 3), "inverter_kwh": round(inverter_kwh, 3), } def calc_water_budget(data: dict, user: str, people: int) -> dict: """Returns expected litres per day per circuit, converted from profile gallon tables.""" p = data["lookups"]["user_profiles"]["profiles"][user] vb = p["volume_based"] cb = p["count_based"] meals = cb["meals_per_day"]["cooking"] shower_cyc = cb["cycles_per_day_per_person"]["shower"] * people toilet_cyc = cb["cycles_per_day_per_person"]["toilet"] * people def g2l(g): return round(g * GAL_TO_LITRES, 3) return { "shower_L": g2l(vb["gal_per_cycle"]["shower_water"] * shower_cyc), "toilet_L": g2l((vb["gal_per_cycle"]["toilet_sink_water"] + vb["gal_per_cycle"]["toilet_gravity_flush"]) * toilet_cyc), "kitchen_L": g2l((vb["gal_per_meal"].get("cooking_kitchen_faucet", 1.5) + vb["gal_per_meal"].get("cooking_dishwasher_water", 1.25)) * meals + vb["gal_per_day"].get("living_cleaning", 0.625) + vb["gal_per_day"].get("living_drinking_water", 0.75) * people), } # --------------------------------------------------------------------------- # TIME-OF-DAY SHAPE FUNCTIONS # --------------------------------------------------------------------------- def solar_curve(hour: float) -> float: if hour < 6 or hour > 18: return 0.0 return max(0.0, math.sin(math.pi * (hour - 6) / 12)) def activity_curve(hour: float) -> float: return max(0.01, math.exp(-0.5 * ((hour - 8.0) / 1.2) ** 2) + math.exp(-0.5 * ((hour - 19.5) / 1.5) ** 2) * 0.8) def hvac_curve(hour: float, temp: str) -> float: if temp == "Hot": return (0.4 + 0.6 * math.sin(math.pi * max(0, hour - 9) / 12) if 9 <= hour <= 21 else 0.2) if temp == "Cold": return 0.7 + 0.3 * (1 - solar_curve(hour)) return 0.4 + 0.1 * math.sin(math.pi * hour / 24) def water_event_curve(hour: float) -> float: return max(0.0, math.exp(-0.5 * ((hour - 7.5) / 1.0) ** 2) + math.exp(-0.5 * ((hour - 19.0) / 1.0) ** 2) * 0.6) def jitter(rng: random.Random, scale: float = 0.05) -> float: return 1.0 + rng.gauss(0, scale) # --------------------------------------------------------------------------- # MINUTE-LEVEL SERIES BUILDERS # --------------------------------------------------------------------------- def build_power_minutes(budget: dict, temp: str, battery_cap_kwh: float, start: datetime, num_days: int, rng: random.Random) -> list[dict]: """ 1-minute power rows. Flow columns: *_kW | State columns: *_Ah, *_Pct, *_V """ solar_cap_kw = budget["solar_kwh"] / 6.0 hvac_mean = budget["hvac_kwh"] / 24 lighting_mean = budget["lighting_kwh"] / (300 / 60) devices_mean = budget["devices_kwh"] / 24 fridge_mean = budget["fridge_kwh"] / 24 pump_mean = budget["water_pump_kwh"] / 2 cooking_mean = budget["cooking_kwh"] / 1.5 inverter_mean = budget["inverter_kwh"] / max(budget["inverter_kwh"] / (0.7 * 120 / 1000), 0.1) battery_kwh = battery_cap_kwh * 0.80 rows = [] for m in range(num_days * MINS_PER_DAY): ts = start + timedelta(minutes=m) hour = ts.hour + ts.minute / 60.0 solar_kw = round(max(0, solar_cap_kw * solar_curve(hour) * rng.uniform(0.85, 1.05)), 4) ac = activity_curve(hour) hvac_kw = round(max(0, hvac_mean * hvac_curve(hour, temp) * jitter(rng, 0.08)), 4) lighting_kw = round((max(0, lighting_mean * ac * jitter(rng, 0.05)) if 6 <= hour <= 23 else 0.002), 4) devices_kw = round(max(0, devices_mean * jitter(rng, 0.04)), 4) fridge_kw = round(max(0, fridge_mean * (0.7 + 0.6 * rng.random()) * jitter(rng, 0.03)), 4) pump_kw = round(max(0, pump_mean * water_event_curve(hour) * jitter(rng, 0.15)), 4) cooking_kw = round(max(0, cooking_mean * ac * jitter(rng, 0.20)) if ac > 0.3 else 0.0, 4) inverter_kw = round(max(0, inverter_mean * ac * jitter(rng, 0.10)), 4) total_load = hvac_kw + lighting_kw + devices_kw + fridge_kw + pump_kw + cooking_kw + inverter_kw net = solar_kw - total_load shore_kw = 0.0 if net < 0 and battery_kwh < abs(net) / 60 * 0.95: shore_kw = round(abs(net) * 1.05, 4) net = 0.0 battery_flow_kw = round(net, 4) battery_kwh = max(0, min(battery_cap_kwh, battery_kwh + battery_flow_kw / 60)) unmetered_kw = round(max(0, solar_kw + shore_kw + (abs(battery_flow_kw) if battery_flow_kw < 0 else 0) - total_load - (battery_flow_kw if battery_flow_kw > 0 else 0) ), 4) rows.append({ "Time": ts.strftime("%Y-%m-%dT%H:%M:%SZ"), "Solar_Flow_kW": solar_kw, "Shore_Flow_kW": shore_kw, "Battery_Flow_kW": battery_flow_kw, "HVAC_Flow_kW": hvac_kw, "Lighting_Flow_kW": lighting_kw, "Devices_Flow_kW": devices_kw, "Fridge_Flow_kW": fridge_kw, "WaterPump_Flow_kW": pump_kw, "Cooking_Flow_kW": cooking_kw, "Inverter_Flow_kW": inverter_kw, "Unmetered_Flow_kW": unmetered_kw, "Battery_Level_Ah": round(battery_kwh * 1000 / BATTERY_NOMINAL_V, 1), "Battery_Level_Pct": round(battery_kwh / battery_cap_kwh * 100, 2), "Solar_Voltage_V": round(rng.uniform(36, 52) if solar_kw > 0 else 0.0, 1), "Battery_Voltage_V": round(46 + (battery_kwh / battery_cap_kwh) * 6 + rng.gauss(0, 0.2), 2), }) return rows def build_water_minutes(budget: dict, fresh_cap_L: float, grey_cap_L: float, black_cap_L: float, start: datetime, num_days: int, rng: random.Random) -> list[dict]: """ 1-minute water rows. Flow columns: *_Lpm | State columns: *_L Black tank level is derived entirely from Toilet_Flow_Lpm: 100% of toilet flush volume enters the black tank. Grey tank receives shower + kitchen waste only (90% of flow, 10% evaporation/splash). """ shower_rate = budget["shower_L"] / MINS_PER_DAY toilet_rate = budget["toilet_L"] / MINS_PER_DAY kitchen_rate = budget["kitchen_L"] / MINS_PER_DAY fresh_L = fresh_cap_L * 0.95 grey_L = 0.0 black_L = 0.0 # starts empty; fills from toilet flow rows = [] for m in range(num_days * MINS_PER_DAY): ts = start + timedelta(minutes=m) hour = ts.hour + ts.minute / 60.0 wc = water_event_curve(hour) # Tank-fill event at 07:00 on day 1 only inlet_Lpm = round(rng.uniform(8, 12), 3) if m == 420 else 0.0 shower_Lpm = round(max(0, shower_rate * wc * 2.5 * jitter(rng, 0.15)), 4) kitchen_Lpm = round(max(0, kitchen_rate * wc * 2.0 * jitter(rng, 0.10)), 4) toilet_Lpm = round(max(0, toilet_rate * wc * 2.0 * jitter(rng, 0.20)), 4) pump_Lpm = shower_Lpm + kitchen_Lpm + toilet_Lpm if fresh_L < pump_Lpm: pump_Lpm = max(0, fresh_L) shower_Lpm = round(pump_Lpm * 0.60, 4) kitchen_Lpm = round(pump_Lpm * 0.25, 4) toilet_Lpm = round(pump_Lpm * 0.15, 4) unmetered_Lpm = round(max(0, pump_Lpm - shower_Lpm - kitchen_Lpm - toilet_Lpm), 4) # Update tank levels fresh_L = max(0, min(fresh_cap_L, fresh_L + inlet_Lpm - pump_Lpm)) grey_L = min(grey_cap_L, grey_L + (shower_Lpm + kitchen_Lpm) * 0.9) black_L = min(black_cap_L, black_L + toilet_Lpm) # 100% of toilet → black tank rows.append({ "Time": ts.strftime("%Y-%m-%dT%H:%M:%SZ"), "Inlet_Flow_Lpm": inlet_Lpm, "Pump_Flow_Lpm": round(pump_Lpm, 4), "Shower_Flow_Lpm": shower_Lpm, "Kitchen_Flow_Lpm": kitchen_Lpm, "Toilet_Flow_Lpm": toilet_Lpm, "Unmetered_Flow_Lpm": unmetered_Lpm, "FreshTank_Level_L": round(fresh_L, 2), "GreyTank_Level_L": round(grey_L, 2), "BlackTank_Level_L": round(black_L, 2), }) return rows # --------------------------------------------------------------------------- # RESAMPLING # --------------------------------------------------------------------------- def resample_power(rows: list[dict], interval_mins: int, mode: str = "mean") -> list[dict]: """ mode='mean' → kW (15MIN flow file) mode='sum' → kWh (1H, 1DAY energy files) [kW × 1 min / 60 = kWh] """ CIRCUITS = ["HVAC", "Lighting", "Devices", "Fridge", "WaterPump", "Cooking", "Inverter", "Unmetered"] out = [] for i in range(0, len(rows), interval_mins): bucket = rows[i: i + interval_mins] if not bucket: continue first, last = bucket[0], bucket[-1] def mean(col): return round(sum(r[col] for r in bucket) / len(bucket), 4) def to_kwh(col): return round(sum(r[col] for r in bucket) / 60, 6) def avg_v(col): return round(sum(r[col] for r in bucket) / len(bucket), 2) row = {"Time": first["Time"]} if mode == "mean": row["Solar_Flow_kW"] = mean("Solar_Flow_kW") row["Shore_Flow_kW"] = mean("Shore_Flow_kW") row["Battery_Flow_kW"] = mean("Battery_Flow_kW") for c in CIRCUITS: row[f"{c}_Flow_kW"] = mean(f"{c}_Flow_kW") row["Battery_Level_Ah"] = last["Battery_Level_Ah"] row["Battery_Level_Pct"] = last["Battery_Level_Pct"] row["Solar_Voltage_V"] = avg_v("Solar_Voltage_V") row["Battery_Voltage_V"] = avg_v("Battery_Voltage_V") else: row["Solar_Total_kWh"] = to_kwh("Solar_Flow_kW") row["Shore_Total_kWh"] = to_kwh("Shore_Flow_kW") # Battery split: charged (+) and discharged (-) as separate positive columns row["Battery_Charged_kWh"] = round( sum(r["Battery_Flow_kW"] for r in bucket if r["Battery_Flow_kW"] > 0) / 60, 6) row["Battery_Discharged_kWh"] = round( sum(abs(r["Battery_Flow_kW"]) for r in bucket if r["Battery_Flow_kW"] < 0) / 60, 6) for c in CIRCUITS: row[f"{c}_Total_kWh"] = to_kwh(f"{c}_Flow_kW") row["Battery_Level_Ah"] = last["Battery_Level_Ah"] row["Battery_Level_Pct"] = last["Battery_Level_Pct"] row["Solar_Voltage_Avg_V"] = avg_v("Solar_Voltage_V") row["Battery_Voltage_Avg_V"] = avg_v("Battery_Voltage_V") out.append(row) return out def resample_water(rows: list[dict], interval_mins: int, mode: str = "mean", fresh_cap_L: float = 378.5, grey_cap_L: float = 189.3, black_cap_L: float = 170.3) -> list[dict]: """ mode='mean' → Lpm (15MIN flow file) mode='sum' → L (1H, 1DAY energy files) [Lpm × 1 min = L] Black tank level is a snapshot carried from 1MIN rows (derived from toilet flow). """ CIRCUITS = ["Inlet", "Pump", "Shower", "Kitchen", "Toilet", "Unmetered"] out = [] for i in range(0, len(rows), interval_mins): bucket = rows[i: i + interval_mins] if not bucket: continue first, last = bucket[0], bucket[-1] def mean_lpm(col): return round(sum(r[col] for r in bucket) / len(bucket), 4) def to_L(col): return round(sum(r[col] for r in bucket), 4) # Lpm × 1 min = L row = {"Time": first["Time"]} if mode == "mean": for c in CIRCUITS: row[f"{c}_Flow_Lpm"] = mean_lpm(f"{c}_Flow_Lpm") row["FreshTank_Level_L"] = last["FreshTank_Level_L"] row["GreyTank_Level_L"] = last["GreyTank_Level_L"] row["BlackTank_Level_L"] = last["BlackTank_Level_L"] else: for c in CIRCUITS: row[f"{c}_Total_L"] = to_L(f"{c}_Flow_Lpm") row["FreshTank_Level_L"] = last["FreshTank_Level_L"] row["FreshTank_Level_Pct"] = round(last["FreshTank_Level_L"] / fresh_cap_L * 100, 2) row["GreyTank_Level_L"] = last["GreyTank_Level_L"] row["GreyTank_Level_Pct"] = round(last["GreyTank_Level_L"] / grey_cap_L * 100, 2) row["BlackTank_Level_L"] = last["BlackTank_Level_L"] row["BlackTank_Level_Pct"] = round(last["BlackTank_Level_L"] / black_cap_L * 100, 2) out.append(row) return out # --------------------------------------------------------------------------- # CSV WRITER # --------------------------------------------------------------------------- def write_csv(path: str, rows: list[dict]): if not rows: return os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, "w", newline="") as f: w = csv.DictWriter(f, fieldnames=rows[0].keys()) w.writeheader() w.writerows(rows) print(f" Wrote {len(rows):>6,} rows -> {path}") # --------------------------------------------------------------------------- # MAIN # --------------------------------------------------------------------------- def main(): parser = argparse.ArgumentParser(description="EV Camper Mock Data Generator") parser.add_argument("--config", default="data.json", help="Path to data.json") parser.add_argument("--out", default="output", help="Output directory") parser.add_argument("--seed", type=int, default=42, help="Random seed") parser.add_argument("--user", default=None, help="Glamper / Typical / Expert") parser.add_argument("--people", type=int, default=None, help="Number of occupants") parser.add_argument("--days", type=int, default=None, help="Trip duration in days") parser.add_argument("--temp", default=None, help="Hot / Temperate / Cold") parser.add_argument("--start", default="2026-02-18T00:00:00", help="Trip start (ISO datetime)") args = parser.parse_args() rng = random.Random(args.seed) data = load_data(args.config) params = data["inputs"]["params"] specs = data["trailer_specs"]["specs"] user = args.user or params["user_type"]["value"] people = args.people or params["num_people"]["value"] days = args.days or params["trip_duration_days"]["value"] temp = args.temp or params["temperature"]["value"] hvac_hrs = params["hvac_runtime_hrs"]["value"] bat_cap_kwh = specs["battery_capacity_kwh"]["value"] fresh_cap_gal = specs["freshwater_capacity_gal"]["value"] grey_cap_gal = specs["greywater_capacity_gal"]["value"] black_cap_gal = specs["blackwater_capacity_gal"]["value"] fresh_cap_L = fresh_cap_gal * GAL_TO_LITRES grey_cap_L = grey_cap_gal * GAL_TO_LITRES black_cap_L = black_cap_gal * GAL_TO_LITRES start = datetime.fromisoformat(args.start) print(f"\n{'='*60}") print(f" EV Camper Mock Data Generator") print(f"{'='*60}") print(f" Profile : {user} | People: {people} | Days: {days}") print(f" Temp : {temp} | Start : {start.strftime('%Y-%m-%d')}") print(f" Battery : {bat_cap_kwh} kWh | Fresh: {fresh_cap_gal} gal | Black: {black_cap_gal} gal") print(f"{'='*60}\n") pw = calc_power_budget(data, user, people, temp, hvac_hrs) wat = calc_water_budget(data, user, people) print(" Daily Power Budget:") for k, v in pw.items(): print(f" {k:<22} {v:.3f} kWh") print(f"\n Daily Water Budget:") for k, v in wat.items(): print(f" {k:<22} {v:.1f} L") print() print(" Generating 1-minute base series...") power_mins = build_power_minutes(pw, temp, bat_cap_kwh, start, days, rng) water_mins = build_water_minutes(wat, fresh_cap_L, grey_cap_L, black_cap_L, start, days, rng) out = Path(args.out) # ------------------------------------------------------------------ # POWER FILES # ------------------------------------------------------------------ pw_dir = out / "power" print(" Resampling power files...") # 1SEC: expand first 3h of 1-min rows to per-second with jitter FLOW_COLS_KW = ["Solar_Flow_kW", "Shore_Flow_kW", "Battery_Flow_kW", "HVAC_Flow_kW", "Lighting_Flow_kW", "Devices_Flow_kW", "Fridge_Flow_kW", "WaterPump_Flow_kW", "Cooking_Flow_kW", "Inverter_Flow_kW", "Unmetered_Flow_kW"] sec_rows = [] for row in power_mins[:180]: ts_base = datetime.strptime(row["Time"], "%Y-%m-%dT%H:%M:%SZ") for s in range(60): sr = dict(row) sr["Time"] = (ts_base + timedelta(seconds=s)).strftime("%Y-%m-%dT%H:%M:%SZ") for col in FLOW_COLS_KW: sr[col] = round(max(0, row[col] * jitter(rng, 0.03)), 4) sec_rows.append(sr) write_csv(str(pw_dir / "1SEC.csv"), sec_rows) write_csv(str(pw_dir / "1MIN.csv"), power_mins) write_csv(str(pw_dir / "15MIN.csv"), resample_power(power_mins, 15, "mean")) write_csv(str(pw_dir / "1H.csv"), resample_power(power_mins, 60, "sum")) write_csv(str(pw_dir / "1DAY.csv"), resample_power(power_mins, MINS_PER_DAY,"sum")) # ------------------------------------------------------------------ # WATER FILES # ------------------------------------------------------------------ wt_dir = out / "water" print(" Resampling water files...") write_csv(str(wt_dir / "1MIN.csv"), water_mins) write_csv(str(wt_dir / "15MIN.csv"), resample_water(water_mins, 15, "mean", fresh_cap_L, grey_cap_L, black_cap_L)) write_csv(str(wt_dir / "1H.csv"), resample_water(water_mins, 60, "sum", fresh_cap_L, grey_cap_L, black_cap_L)) write_csv(str(wt_dir / "1DAY.csv"), resample_water(water_mins, MINS_PER_DAY, "sum", fresh_cap_L, grey_cap_L, black_cap_L)) if __name__ == "__main__": main()