Spaces:

lvvignesh2122
/

Adaptive-AI-System

Sleeping

App Files Files Community

Adaptive-AI-System / runner /generate_expert_data.py

lvvignesh2122

Phase 2A Complete: Balanced Elite Dataset (180 episodes, 70% Expert ratio)

54074b6 about 2 months ago

raw

history blame contribute delete

7.35 kB

	import sys
	import os
	import json
	import random
	import requests

	# Ensure we can import from parent directory
	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from env.client import GlobalCrisisEnv
	from agent.planner import decide_action

	# Configuration
	TARGET_PER_TASK = 60
	EXPERT_RATIO = 0.7
	NEAR_EXPERT_RATIO = 0.3
	WASTE_THRESHOLD = 0.1
	OUTPUT_PATH = "logs/expert_trajectories.jsonl"
	TASKS = ["easy", "medium", "hard"]
	MAX_FAIL_SAFES = 200

	# Task-specific scoring ceilings
	TASK_THRESHOLDS = {
	"easy": 0.150,
	"medium": 0.175,
	"hard": 0.128
	}

	SCORE_MIN = 0.115

	SYSTEM_PROMPT = (
	"You are a Geopolitical Crisis Logistics AI. Your goal is to stabilize "
	"hospital, emergency, and transport demands with minimal fuel waste."
	)

	def build_llama_instruction(state: dict, action: dict, reasoning: str) -> dict:
	user_prompt = (
	"Given the current crisis state, allocate fuel optimally to "
	"maximize stability while minimizing waste.\n\n"
	"State: " + json.dumps(state) + "\n"
	"What is the optimal fuel allocation for this step?"
	)
	assistant_response = {
	"reasoning": reasoning,
	"action": action
	}
	return {
	"system": SYSTEM_PROMPT,
	"user": user_prompt,
	"assistant": json.dumps(assistant_response)
	}

	def run_expert_generation():
	if not os.path.exists("logs"):
	os.makedirs("logs", exist_ok=True)

	counts = {t: {"expert": 0, "near_expert": 0} for t in TASKS}
	initial_demands = {"hospital": 40, "emergency": 30, "transport": 20, "residential": 15}
	starting_fuels = {"easy": 160, "medium": 120, "hard": 80}

	print("Phase 2A Re-Balancing Started")

	with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
	pass

	try:
	with GlobalCrisisEnv() as env:
	for task in TASKS:
	consecutive_failures = 0
	current_expert_threshold = TASK_THRESHOLDS[task]
	target_experts = int(60 * 0.7)
	target_nears = int(60 * 0.3)

	while (counts[task]["expert"] + counts[task]["near_expert"]) < 60:
	if counts[task]["expert"] < target_experts:
	noise_level = 0.0
	else:
	noise_level = 0.1

	if consecutive_failures >= 200:
	current_expert_threshold = max(0.12, current_expert_threshold - 0.005)
	print("Relaxing threshold for " + task)
	consecutive_failures = 0

	try:
	obs = env.reset(task_id=task)
	except Exception as e:
	print("Connection Error: " + str(e))
	return

	initial_fuel = starting_fuels[task]
	total_waste = 0
	trajectory = []
	cumulative_reward = 0.0

	for step in range(1, 6):
	remaining_steps = 5 - step

	state_capture = {
	"initial_fuel": initial_fuel,
	"remaining_steps": remaining_steps,
	"fuel_available": obs.fuel_available,
	"fuel_ratio": round(obs.fuel_available / initial_fuel, 3),
	"hospital_demand": obs.hospital_demand,
	"emergency_demand": obs.emergency_demand,
	"transport_demand": obs.transport_demand,
	"residential_demand": obs.residential_demand,
	"hospital_ratio": round(obs.hospital_demand / 40, 3),
	"emergency_ratio": round(obs.emergency_demand / 30, 3),
	"transport_ratio": round(obs.transport_demand / 20, 3),
	"residential_ratio": round(obs.residential_demand / 15, 3),
	"bottleneck": obs.transport_demand > 5,
	"step_fraction": round(step / 5.0, 1),
	"cumulative_reward": round(cumulative_reward, 4)
	}

	action, reasoning = decide_action(obs, randomness=noise_level)

	waste = (
	max(0, action["fuel_to_hospital"] - obs.hospital_demand) +
	max(0, action["fuel_to_emergency"] - obs.emergency_demand) +
	max(0, action["fuel_to_transport"] - obs.transport_demand) +
	max(0, action["fuel_to_residential"] - obs.residential_demand)
	)
	total_waste += waste

	try:
	obs = env.step(action)
	except:
	break

	cumulative_reward += obs.reward
	trajectory.append({
	"step": step,
	"state": state_capture,
	"action": action,
	"reasoning": reasoning,
	"reward": round(obs.reward, 4),
	"instruction": build_llama_instruction(state_capture, action, reasoning)
	})

	if len(trajectory) < 5:
	consecutive_failures += 1
	continue

	episode_score = round(cumulative_reward / 5.0, 4)
	waste_ratio = total_waste / initial_fuel

	if episode_score >= 0.115 and waste_ratio <= 0.1:
	quality = "expert" if episode_score >= current_expert_threshold else "near_expert"

	if quality == "expert" and counts[task]["expert"] >= target_experts:
	consecutive_failures += 1
	continue
	if quality == "near_expert" and counts[task]["near_expert"] >= target_nears:
	consecutive_failures += 1
	continue

	for point in trajectory:
	point["state"]["outcome_score"] = episode_score

	record = {
	"task": task,
	"quality": quality,
	"score": episode_score,
	"waste": total_waste,
	"trajectory": trajectory
	}

	with open(OUTPUT_PATH, "a", encoding="utf-8") as f:
	f.write(json.dumps(record) + "\n")

	counts[task][quality] += 1
	print("Saved " + task + " " + quality + " - " + str(episode_score))
	consecutive_failures = 0
	else:
	consecutive_failures += 1

	except Exception as e:
	print("Loop Error: " + str(e))

	if __name__ == "__main__":
	run_expert_generation()