| | import os |
| | import sys |
| | import re |
| | import numpy as np |
| | import torch |
| | import matplotlib.pyplot as plt |
| | import pandas as pd |
| | import time |
| | from datetime import datetime |
| |
|
| | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) |
| |
|
| | from solar_sys_environment import SolarSys |
| | from PG.trainer.pg import PGAgent |
| | |
| | def main(): |
| | STATE_TO_RUN = "pennsylvania" |
| |
|
| | |
| | DATA_FILE_PATH = "/path/to/project/training/5houses_152days_TRAIN.csv" |
| | num_episodes = 10000 |
| | batch_size = 256 |
| | checkpoint_interval = 100000 |
| | window_size = 32 |
| |
|
| | env = SolarSys( |
| | data_path=DATA_FILE_PATH, |
| | state=STATE_TO_RUN, |
| | time_freq="15T" |
| | ) |
| |
|
| | |
| | print("Observation space:", env.observation_space) |
| | print("Action space :", env.action_space) |
| |
|
| | |
| | obs = env.reset() |
| | print(f"Reset returned {len(obs)} agent observations; each obs shape: {np.array(obs).shape}") |
| |
|
| | |
| | dummy_actions = np.random.rand(env.num_agents, env.action_space.shape[1]).astype(np.float32) |
| | next_obs, rewards, done, info = env.step(dummy_actions) |
| | print(f"Step outputs → next_obs: {len(next_obs)}×{np.array(next_obs).shape[1]}, " |
| | f"rewards: {len(rewards)}, done: {done}") |
| | print("Info keys:", list(info.keys())) |
| |
|
| | |
| | env.group_counts = { |
| | 0: env.agent_groups.count(0), |
| | 1: env.agent_groups.count(1) |
| | } |
| | print(f"Number of houses in each group: {env.group_counts}") |
| |
|
| | max_steps = env.num_steps |
| |
|
| | |
| | num_agents = env.num_agents |
| | local_state_dim = env.observation_space.shape[1] |
| | action_dim = env.action_space.shape[1] |
| |
|
| | |
| | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
| | run_name = f"pg_{STATE_TO_RUN}_{num_agents}agents_{num_episodes}eps_{timestamp}" |
| | root_dir = os.path.join("FINALE_FINALE_FINALE", run_name) |
| | os.makedirs(root_dir, exist_ok=True) |
| | print(f"Saving training outputs to: {root_dir}") |
| |
|
| | logs_dir = os.path.join(root_dir, "logs") |
| | plots_dir = os.path.join(root_dir, "plots") |
| | os.makedirs(logs_dir, exist_ok=True) |
| | os.makedirs(plots_dir, exist_ok=True) |
| |
|
| | |
| | pg_agents = [ |
| | PGAgent( |
| | state_dim=local_state_dim, |
| | action_dim=action_dim, |
| | lr=2e-4, |
| | gamma=0.95, |
| | critic_loss_coef=0.5 |
| | ) |
| | for _ in range(num_agents) |
| | ] |
| |
|
| | |
| | episode_rewards = [] |
| | episode_total_rewards = [] |
| | block_mean_rewards = [] |
| | block_total_rewards = [] |
| |
|
| | agent_rewards_log = [[] for _ in range(num_agents)] |
| | best_mean_reward = -1e9 |
| | best_model_path = os.path.join(logs_dir, "best_model.pth") |
| |
|
| | daily_rewards = [] |
| | monthly_rewards = [] |
| |
|
| | training_start_time = time.time() |
| | episode_durations = [] |
| | total_steps_global = 0 |
| | episode_log_data = [] |
| | performance_metrics_log = [] |
| |
|
| | agent_charge_log = [[] for _ in range(num_agents)] |
| | agent_discharge_log = [[] for _ in range(num_agents)] |
| |
|
| | |
| | for episode in range(1, num_episodes + 1): |
| | episode_start_time = time.time() |
| |
|
| | obs = np.array(env.reset(), dtype=np.float32) |
| |
|
| | if episode > 1: |
| | last_episode_metrics = env.get_episode_metrics() |
| | last_episode_metrics['Episode'] = episode - 1 |
| | performance_metrics_log.append(last_episode_metrics) |
| |
|
| | total_reward = np.zeros(num_agents, dtype=np.float32) |
| | done = False |
| | step_count = 0 |
| | day_logs = [] |
| | episode_charges = [[] for _ in range(num_agents)] |
| | episode_discharges = [[] for _ in range(num_agents)] |
| |
|
| | |
| | while not done: |
| | |
| | actions = [] |
| | for i, agent in enumerate(pg_agents): |
| | agent_action = agent.select_action(obs[i]) |
| | actions.append(agent_action) |
| | actions = np.array(actions, dtype=np.float32) |
| |
|
| | |
| | next_obs_list, rewards, done, info = env.step(actions) |
| | next_obs = np.array(next_obs_list, dtype=np.float32) |
| |
|
| | |
| | for i, agent in enumerate(pg_agents): |
| | agent.rewards.append(rewards[i]) |
| | agent.dones.append(done) |
| |
|
| | total_reward += rewards |
| | obs = next_obs |
| | step_count += 1 |
| | total_steps_global += 1 |
| |
|
| | day_logs.append({ |
| | "step": step_count - 1, |
| | "grid_import_no_p2p": info["grid_import_no_p2p"], |
| | "grid_import_with_p2p": info["grid_import_with_p2p"], |
| | "p2p_buy": info["p2p_buy"], |
| | "p2p_sell": info["p2p_sell"], |
| | "costs": info["costs"], |
| | "charge_amount": info.get("charge_amount", np.zeros(num_agents)), |
| | "discharge_amount": info.get("discharge_amount", np.zeros(num_agents)) |
| | }) |
| |
|
| | |
| | for i in range(num_agents): |
| | episode_charges[i].append(info["charge_amount"][i]) |
| | episode_discharges[i].append(info["discharge_amount"][i]) |
| |
|
| | if step_count >= max_steps: |
| | break |
| |
|
| | |
| | sum_ep_reward = float(np.sum(total_reward)) |
| | mean_ep_reward = float(np.mean(total_reward)) |
| |
|
| | episode_total_rewards.append(sum_ep_reward) |
| | episode_rewards.append(mean_ep_reward) |
| | daily_rewards.append(mean_ep_reward) |
| |
|
| | if len(daily_rewards) % window_size == 0: |
| | last_totals = episode_total_rewards[-window_size:] |
| | block_sum = sum(last_totals) |
| | block_total_rewards.append(block_sum) |
| |
|
| | last_means = daily_rewards[-window_size:] |
| | block_mean = sum(last_means) / window_size |
| | block_mean_rewards.append(block_mean) |
| |
|
| | block_idx = len(block_mean_rewards) |
| | print( |
| | f"→ Completed Block {block_idx} " |
| | f"| Episodes {(block_idx - 1) * window_size + 1}–{block_idx * window_size} " |
| | f"| Block Total Reward: {block_sum:.3f} " |
| | f"| Block Mean Reward: {block_mean:.3f}" |
| | ) |
| |
|
| | for i in range(num_agents): |
| | agent_rewards_log[i].append(total_reward[i]) |
| | agent_charge_log[i].append(np.mean(episode_charges[i])) |
| | agent_discharge_log[i].append(np.mean(episode_discharges[i])) |
| |
|
| | steps_data = [] |
| | for entry in day_logs: |
| | steps_data.append({ |
| | "step": entry["step"], |
| | "p2p_buy_sum": float(np.sum(entry["p2p_buy"])), |
| | "p2p_sell_sum": float(np.sum(entry["p2p_sell"])), |
| | "grid_import_no_p2p_sum": float(np.sum(entry["grid_import_no_p2p"])), |
| | "grid_import_with_p2p_sum": float(np.sum(entry["grid_import_with_p2p"])) |
| | }) |
| |
|
| | baseline_cost = np.sum([np.sum(entry["grid_import_no_p2p"]) * env.get_grid_price(entry["step"]) |
| | for entry in day_logs]) |
| | actual_cost = np.sum([np.sum(entry["costs"]) for entry in day_logs]) |
| | cost_reduction = (baseline_cost - actual_cost) / (baseline_cost + 1e-8) |
| |
|
| | |
| | for agent in pg_agents: |
| | agent.update() |
| |
|
| | |
| | if mean_ep_reward > best_mean_reward: |
| | best_mean_reward = mean_ep_reward |
| | for i, agent in enumerate(pg_agents): |
| | agent_path = os.path.join(logs_dir, f"best_model_agent_{i}.pth") |
| | agent.save(agent_path) |
| |
|
| | if episode % checkpoint_interval == 0: |
| | for i, agent in enumerate(pg_agents): |
| | ckpt_path = os.path.join(logs_dir, f"checkpoint_{episode}_agent_{i}.pth") |
| | agent.save(ckpt_path) |
| |
|
| | episode_end_time = time.time() |
| | episode_duration = episode_end_time - episode_start_time |
| |
|
| | print( |
| | f"Episode {episode}/{num_episodes} " |
| | f"| Time per Episode: {episode_duration:.2f}s " |
| | f"| Steps: {step_count} " |
| | f"| Mean Reward: {mean_ep_reward:.3f} " |
| | f"| Cost Reduction: {cost_reduction:.2%}" |
| | ) |
| |
|
| | episode_log_data.append({ |
| | "Episode": episode, |
| | "Steps": step_count, |
| | "Mean_Reward": mean_ep_reward, |
| | "Total_Reward": sum_ep_reward, |
| | "Cost_Reduction_Pct": cost_reduction * 100, |
| | "Baseline_Cost": baseline_cost, |
| | "Actual_Cost": actual_cost, |
| | "Episode_Duration": episode_duration, |
| | "Total_Charge": np.sum([np.sum(entry["charge_amount"]) for entry in day_logs]), |
| | "Total_Discharge": np.sum([np.sum(entry["discharge_amount"]) for entry in day_logs]) |
| | }) |
| |
|
| | |
| | if episode % 100 == 0: |
| | avg_reward_last_100 = np.mean(daily_rewards[-100:]) if len(daily_rewards) >= 100 else np.mean(daily_rewards) |
| | print(f" → Average reward (last 100 episodes): {avg_reward_last_100:.3f}") |
| |
|
| | |
| | final_episode_metrics = env.get_episode_metrics() |
| | final_episode_metrics['Episode'] = num_episodes |
| | performance_metrics_log.append(final_episode_metrics) |
| |
|
| | training_end_time = time.time() |
| | total_training_time = training_end_time - training_start_time |
| |
|
| | |
| | print("\nSaving final models...") |
| | for i, agent in enumerate(pg_agents): |
| | final_path = os.path.join(logs_dir, f"final_model_agent_{i}.pth") |
| | agent.save(final_path) |
| |
|
| | np.save(os.path.join(logs_dir, "agent_rewards.npy"), np.array(agent_rewards_log)) |
| | np.save(os.path.join(logs_dir, "mean_rewards.npy"), np.array(episode_rewards)) |
| | np.save(os.path.join(logs_dir, "total_rewards.npy"), np.array(episode_total_rewards)) |
| |
|
| | |
| | df_rewards_log = pd.DataFrame(episode_log_data) |
| | df_perf_log = pd.DataFrame(performance_metrics_log) |
| | df_final_log = pd.merge(df_rewards_log, df_perf_log.drop(columns=[ |
| | 'degradation_cost_over_time', |
| | 'cost_savings_over_time', |
| | 'grid_reduction_over_time' |
| | ]), on="Episode") |
| |
|
| | |
| | def moving_avg(series, window): |
| | return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy() |
| |
|
| | ma_window = 300 |
| | episodes = np.arange(1, num_episodes + 1) |
| |
|
| | |
| | reward_ma = moving_avg(df_final_log["Mean_Reward"], ma_window) |
| | plt.figure(figsize=(8, 5)) |
| | plt.plot(episodes, reward_ma, linewidth=2, label=f"Mean Reward MA (win={ma_window})") |
| | plt.xlabel("Episode") |
| | plt.ylabel("Mean Reward") |
| | plt.title("PG: Mean Reward Moving Average") |
| | plt.legend() |
| | plt.grid(True) |
| | plt.savefig(os.path.join(plots_dir, "mean_reward_ma.png"), dpi=200) |
| | plt.close() |
| |
|
| | |
| | total_ma = moving_avg(df_final_log["Total_Reward"], ma_window) |
| | plt.figure(figsize=(8, 5)) |
| | plt.plot(episodes, total_ma, linewidth=2, label=f"Total Reward MA (win={ma_window})") |
| | plt.xlabel("Episode") |
| | plt.ylabel("Total Reward") |
| | plt.title("PG: Total Reward Moving Average") |
| | plt.legend() |
| | plt.grid(True) |
| | plt.savefig(os.path.join(plots_dir, "total_reward_ma.png"), dpi=200) |
| | plt.close() |
| |
|
| | |
| | cost_ma = moving_avg(df_final_log["Cost_Reduction_Pct"], ma_window) |
| | plt.figure(figsize=(8, 5)) |
| | plt.plot(episodes, cost_ma, linewidth=2, label="Cost Reduction MA (%)") |
| | plt.xlabel("Episode") |
| | plt.ylabel("Cost Reduction (%)") |
| | plt.title("PG: Cost Reduction Moving Average") |
| | plt.legend() |
| | plt.grid(True) |
| | plt.savefig(os.path.join(plots_dir, "cost_reduction_ma.png"), dpi=200) |
| | plt.close() |
| |
|
| | |
| | degradation_ma = moving_avg(df_final_log["battery_degradation_cost_total"], ma_window) |
| | plt.figure(figsize=(8, 5)) |
| | plt.plot(episodes, degradation_ma, linewidth=2, label=f"Degradation Cost MA (win={ma_window})", color='purple') |
| | plt.xlabel("Episode") |
| | plt.ylabel("Total Degradation Cost ($)") |
| | plt.title("PG: Battery Degradation Cost Moving Average") |
| | plt.legend() |
| | plt.grid(True) |
| | plt.savefig(os.path.join(plots_dir, "degradation_cost_ma.png"), dpi=200) |
| | plt.close() |
| |
|
| | print(f"\nAll moving-average plots saved to: {plots_dir}") |
| |
|
| | |
| | total_time_row = pd.DataFrame([{ |
| | "Episode": "Total_Training_Time", |
| | "Episode_Duration": total_training_time |
| | }]) |
| | df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True) |
| |
|
| | log_csv_path = os.path.join(logs_dir, "training_performance_log.csv") |
| |
|
| | columns_to_save = [ |
| | "Episode", |
| | "Mean_Reward", |
| | "Total_Reward", |
| | "Cost_Reduction_Pct", |
| | "Episode_Duration", |
| | "battery_degradation_cost_total", |
| | ] |
| | df_to_save = df_to_save[columns_to_save] |
| |
|
| | df_to_save.to_csv(log_csv_path, index=False) |
| |
|
| | print(f"Saved comprehensive training performance log to: {log_csv_path}") |
| |
|
| | |
| | print("\n" + "="*50) |
| | print("TRAINING COMPLETE".center(50)) |
| | print(f"Total training time: {total_training_time:.2f} seconds") |
| | print(f"Device used: {pg_agents[0].device}") |
| | print("="*50) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |