import sys import os import json import random import requests # Ensure we can import from parent directory sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from env.client import GlobalCrisisEnv from agent.planner import decide_action # Configuration TARGET_PER_TASK = 60 EXPERT_RATIO = 0.7 NEAR_EXPERT_RATIO = 0.3 WASTE_THRESHOLD = 0.1 OUTPUT_PATH = "logs/expert_trajectories.jsonl" TASKS = ["easy", "medium", "hard"] MAX_FAIL_SAFES = 200 # Task-specific scoring ceilings TASK_THRESHOLDS = { "easy": 0.150, "medium": 0.175, "hard": 0.128 } SCORE_MIN = 0.115 SYSTEM_PROMPT = ( "You are a Geopolitical Crisis Logistics AI. Your goal is to stabilize " "hospital, emergency, and transport demands with minimal fuel waste." ) def build_llama_instruction(state: dict, action: dict, reasoning: str) -> dict: user_prompt = ( "Given the current crisis state, allocate fuel optimally to " "maximize stability while minimizing waste.\n\n" "State: " + json.dumps(state) + "\n" "What is the optimal fuel allocation for this step?" ) assistant_response = { "reasoning": reasoning, "action": action } return { "system": SYSTEM_PROMPT, "user": user_prompt, "assistant": json.dumps(assistant_response) } def run_expert_generation(): if not os.path.exists("logs"): os.makedirs("logs", exist_ok=True) counts = {t: {"expert": 0, "near_expert": 0} for t in TASKS} initial_demands = {"hospital": 40, "emergency": 30, "transport": 20, "residential": 15} starting_fuels = {"easy": 160, "medium": 120, "hard": 80} print("Phase 2A Re-Balancing Started") with open(OUTPUT_PATH, "w", encoding="utf-8") as f: pass try: with GlobalCrisisEnv() as env: for task in TASKS: consecutive_failures = 0 current_expert_threshold = TASK_THRESHOLDS[task] target_experts = int(60 * 0.7) target_nears = int(60 * 0.3) while (counts[task]["expert"] + counts[task]["near_expert"]) < 60: if counts[task]["expert"] < target_experts: noise_level = 0.0 else: noise_level = 0.1 if consecutive_failures >= 200: current_expert_threshold = max(0.12, current_expert_threshold - 0.005) print("Relaxing threshold for " + task) consecutive_failures = 0 try: obs = env.reset(task_id=task) except Exception as e: print("Connection Error: " + str(e)) return initial_fuel = starting_fuels[task] total_waste = 0 trajectory = [] cumulative_reward = 0.0 for step in range(1, 6): remaining_steps = 5 - step state_capture = { "initial_fuel": initial_fuel, "remaining_steps": remaining_steps, "fuel_available": obs.fuel_available, "fuel_ratio": round(obs.fuel_available / initial_fuel, 3), "hospital_demand": obs.hospital_demand, "emergency_demand": obs.emergency_demand, "transport_demand": obs.transport_demand, "residential_demand": obs.residential_demand, "hospital_ratio": round(obs.hospital_demand / 40, 3), "emergency_ratio": round(obs.emergency_demand / 30, 3), "transport_ratio": round(obs.transport_demand / 20, 3), "residential_ratio": round(obs.residential_demand / 15, 3), "bottleneck": obs.transport_demand > 5, "step_fraction": round(step / 5.0, 1), "cumulative_reward": round(cumulative_reward, 4) } action, reasoning = decide_action(obs, randomness=noise_level) waste = ( max(0, action["fuel_to_hospital"] - obs.hospital_demand) + max(0, action["fuel_to_emergency"] - obs.emergency_demand) + max(0, action["fuel_to_transport"] - obs.transport_demand) + max(0, action["fuel_to_residential"] - obs.residential_demand) ) total_waste += waste try: obs = env.step(action) except: break cumulative_reward += obs.reward trajectory.append({ "step": step, "state": state_capture, "action": action, "reasoning": reasoning, "reward": round(obs.reward, 4), "instruction": build_llama_instruction(state_capture, action, reasoning) }) if len(trajectory) < 5: consecutive_failures += 1 continue episode_score = round(cumulative_reward / 5.0, 4) waste_ratio = total_waste / initial_fuel if episode_score >= 0.115 and waste_ratio <= 0.1: quality = "expert" if episode_score >= current_expert_threshold else "near_expert" if quality == "expert" and counts[task]["expert"] >= target_experts: consecutive_failures += 1 continue if quality == "near_expert" and counts[task]["near_expert"] >= target_nears: consecutive_failures += 1 continue for point in trajectory: point["state"]["outcome_score"] = episode_score record = { "task": task, "quality": quality, "score": episode_score, "waste": total_waste, "trajectory": trajectory } with open(OUTPUT_PATH, "a", encoding="utf-8") as f: f.write(json.dumps(record) + "\n") counts[task][quality] += 1 print("Saved " + task + " " + quality + " - " + str(episode_score)) consecutive_failures = 0 else: consecutive_failures += 1 except Exception as e: print("Loop Error: " + str(e)) if __name__ == "__main__": run_expert_generation()