Spaces:
Sleeping
Sleeping
| import sys | |
| import os | |
| import json | |
| import random | |
| import requests | |
| # Ensure we can import from parent directory | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from env.client import GlobalCrisisEnv | |
| from agent.planner import decide_action | |
| # Configuration | |
| TARGET_PER_TASK = 60 | |
| EXPERT_RATIO = 0.7 | |
| NEAR_EXPERT_RATIO = 0.3 | |
| WASTE_THRESHOLD = 0.1 | |
| OUTPUT_PATH = "logs/expert_trajectories.jsonl" | |
| TASKS = ["easy", "medium", "hard"] | |
| MAX_FAIL_SAFES = 200 | |
| # Task-specific scoring ceilings | |
| TASK_THRESHOLDS = { | |
| "easy": 0.150, | |
| "medium": 0.175, | |
| "hard": 0.128 | |
| } | |
| SCORE_MIN = 0.115 | |
| SYSTEM_PROMPT = ( | |
| "You are a Geopolitical Crisis Logistics AI. Your goal is to stabilize " | |
| "hospital, emergency, and transport demands with minimal fuel waste." | |
| ) | |
| def build_llama_instruction(state: dict, action: dict, reasoning: str) -> dict: | |
| user_prompt = ( | |
| "Given the current crisis state, allocate fuel optimally to " | |
| "maximize stability while minimizing waste.\n\n" | |
| "State: " + json.dumps(state) + "\n" | |
| "What is the optimal fuel allocation for this step?" | |
| ) | |
| assistant_response = { | |
| "reasoning": reasoning, | |
| "action": action | |
| } | |
| return { | |
| "system": SYSTEM_PROMPT, | |
| "user": user_prompt, | |
| "assistant": json.dumps(assistant_response) | |
| } | |
| def run_expert_generation(): | |
| if not os.path.exists("logs"): | |
| os.makedirs("logs", exist_ok=True) | |
| counts = {t: {"expert": 0, "near_expert": 0} for t in TASKS} | |
| initial_demands = {"hospital": 40, "emergency": 30, "transport": 20, "residential": 15} | |
| starting_fuels = {"easy": 160, "medium": 120, "hard": 80} | |
| print("Phase 2A Re-Balancing Started") | |
| with open(OUTPUT_PATH, "w", encoding="utf-8") as f: | |
| pass | |
| try: | |
| with GlobalCrisisEnv() as env: | |
| for task in TASKS: | |
| consecutive_failures = 0 | |
| current_expert_threshold = TASK_THRESHOLDS[task] | |
| target_experts = int(60 * 0.7) | |
| target_nears = int(60 * 0.3) | |
| while (counts[task]["expert"] + counts[task]["near_expert"]) < 60: | |
| if counts[task]["expert"] < target_experts: | |
| noise_level = 0.0 | |
| else: | |
| noise_level = 0.1 | |
| if consecutive_failures >= 200: | |
| current_expert_threshold = max(0.12, current_expert_threshold - 0.005) | |
| print("Relaxing threshold for " + task) | |
| consecutive_failures = 0 | |
| try: | |
| obs = env.reset(task_id=task) | |
| except Exception as e: | |
| print("Connection Error: " + str(e)) | |
| return | |
| initial_fuel = starting_fuels[task] | |
| total_waste = 0 | |
| trajectory = [] | |
| cumulative_reward = 0.0 | |
| for step in range(1, 6): | |
| remaining_steps = 5 - step | |
| state_capture = { | |
| "initial_fuel": initial_fuel, | |
| "remaining_steps": remaining_steps, | |
| "fuel_available": obs.fuel_available, | |
| "fuel_ratio": round(obs.fuel_available / initial_fuel, 3), | |
| "hospital_demand": obs.hospital_demand, | |
| "emergency_demand": obs.emergency_demand, | |
| "transport_demand": obs.transport_demand, | |
| "residential_demand": obs.residential_demand, | |
| "hospital_ratio": round(obs.hospital_demand / 40, 3), | |
| "emergency_ratio": round(obs.emergency_demand / 30, 3), | |
| "transport_ratio": round(obs.transport_demand / 20, 3), | |
| "residential_ratio": round(obs.residential_demand / 15, 3), | |
| "bottleneck": obs.transport_demand > 5, | |
| "step_fraction": round(step / 5.0, 1), | |
| "cumulative_reward": round(cumulative_reward, 4) | |
| } | |
| action, reasoning = decide_action(obs, randomness=noise_level) | |
| waste = ( | |
| max(0, action["fuel_to_hospital"] - obs.hospital_demand) + | |
| max(0, action["fuel_to_emergency"] - obs.emergency_demand) + | |
| max(0, action["fuel_to_transport"] - obs.transport_demand) + | |
| max(0, action["fuel_to_residential"] - obs.residential_demand) | |
| ) | |
| total_waste += waste | |
| try: | |
| obs = env.step(action) | |
| except: | |
| break | |
| cumulative_reward += obs.reward | |
| trajectory.append({ | |
| "step": step, | |
| "state": state_capture, | |
| "action": action, | |
| "reasoning": reasoning, | |
| "reward": round(obs.reward, 4), | |
| "instruction": build_llama_instruction(state_capture, action, reasoning) | |
| }) | |
| if len(trajectory) < 5: | |
| consecutive_failures += 1 | |
| continue | |
| episode_score = round(cumulative_reward / 5.0, 4) | |
| waste_ratio = total_waste / initial_fuel | |
| if episode_score >= 0.115 and waste_ratio <= 0.1: | |
| quality = "expert" if episode_score >= current_expert_threshold else "near_expert" | |
| if quality == "expert" and counts[task]["expert"] >= target_experts: | |
| consecutive_failures += 1 | |
| continue | |
| if quality == "near_expert" and counts[task]["near_expert"] >= target_nears: | |
| consecutive_failures += 1 | |
| continue | |
| for point in trajectory: | |
| point["state"]["outcome_score"] = episode_score | |
| record = { | |
| "task": task, | |
| "quality": quality, | |
| "score": episode_score, | |
| "waste": total_waste, | |
| "trajectory": trajectory | |
| } | |
| with open(OUTPUT_PATH, "a", encoding="utf-8") as f: | |
| f.write(json.dumps(record) + "\n") | |
| counts[task][quality] += 1 | |
| print("Saved " + task + " " + quality + " - " + str(episode_score)) | |
| consecutive_failures = 0 | |
| else: | |
| consecutive_failures += 1 | |
| except Exception as e: | |
| print("Loop Error: " + str(e)) | |
| if __name__ == "__main__": | |
| run_expert_generation() | |