Spaces:

devxpy
/

rl_hack

Sleeping

App Files Files Community

rl_hack / server /hr_onboarding_environment.py

devxpy

Upload folder using huggingface_hub

e181764 verified 16 days ago

raw

history blame contribute delete

6 kB

	"""
	HR Onboarding/Offboarding Environment Implementation.

	An OpenEnv environment that simulates enterprise HR workflows.
	The agent calls tools (hr_create_employee, it_assign_asset, etc.)
	to complete onboarding/offboarding tasks. Reward is computed via rubrics.
	"""

	import json
	import random
	from typing import Any, Dict, List, Optional
	from uuid import uuid4

	from openenv.core.env_server.interfaces import Environment
	from openenv.core.env_server.types import State

	from models import HROnboardingAction, HROnboardingObservation

	try:
	from .world import WorldState
	from .tools import ToolRegistry, TOOL_DEFINITIONS
	from .tasks import TaskGenerator
	from .rubrics import RubricEvaluator
	except ImportError:
	from world import WorldState
	from tools import ToolRegistry, TOOL_DEFINITIONS
	from tasks import TaskGenerator
	from rubrics import RubricEvaluator


	class HROnboardingEnvironment(Environment):
	"""
	HR Onboarding/Offboarding environment.

	Simulates an enterprise HR system with 200+ employees, 8 departments,
	RBAC, approval chains, and IT provisioning. The agent calls one of 25
	tools per step to complete onboarding/offboarding tasks.

	Example:
	>>> env = HROnboardingEnvironment()
	>>> obs = env.reset()
	>>> print(obs.instruction) # "Onboard Priya Sharma to Engineering..."
	>>>
	>>> obs = env.step(HROnboardingAction(
	... tool_name="hr_create_employee",
	... arguments={"name": "Priya Sharma", "department": "Engineering",
	... "level": "L2", "role": "Software Engineer"}
	... ))
	>>> print(obs.tool_result) # {"success": true, "employee": {...}}
	>>> print(obs.reward) # 0.0 (intermediate) or 0.85 (final)
	"""

	SUPPORTS_CONCURRENT_SESSIONS: bool = True

	def __init__(self, seed: int = 42, max_steps: int = 15):
	"""Initialize the HR environment."""
	self._seed = seed
	self._max_steps = max_steps
	self._rng = random.Random(seed)

	# World state + tools
	self.world = WorldState()
	self.tool_registry = ToolRegistry(self.world)
	self.evaluator = RubricEvaluator()

	# Tasks
	self._task_gen = TaskGenerator(self.world, seed=seed)
	self._tasks = self._task_gen.generate_all_tasks()
	self._task_idx = 0
	self._current_task = None

	# Episode state
	self._state = State(episode_id=str(uuid4()), step_count=0)
	self._done = False
	self._tool_names = [t["name"] for t in TOOL_DEFINITIONS]

	def reset(self) -> HROnboardingObservation:
	"""
	Reset the environment for a new episode.

	Picks the next task, resets world state, returns initial observation
	with the task instruction and available tools.
	"""
	self.world.reset()
	self._done = False

	# Pick next task (cycle through)
	self._current_task = self._tasks[self._task_idx % len(self._tasks)]
	self._task_idx += 1

	# Apply task setup if any
	if self._current_task.setup_fn:
	self._current_task.setup_fn(self.world)

	self._state = State(episode_id=str(uuid4()), step_count=0)

	return HROnboardingObservation(
	task_id=self._current_task.task_id,
	instruction=self._current_task.instruction,
	tool_name="",
	tool_result={},
	step=0,
	max_steps=self._max_steps,
	available_tools=self._tool_names,
	done=False,
	reward=0.0,
	metadata={
	"difficulty": self._current_task.difficulty,
	"category": self._current_task.category,
	"context": self._current_task.context,
	},
	)

	def step(self, action: HROnboardingAction) -> HROnboardingObservation: # type: ignore[override]
	"""
	Execute one step: call the specified tool and return the result.

	Args:
	action: HROnboardingAction with tool_name and arguments.

	Returns:
	HROnboardingObservation with tool result, reward (on final step), and done flag.
	"""
	if self._done:
	return HROnboardingObservation(
	task_id=self._current_task.task_id if self._current_task else "",
	instruction="",
	tool_name=action.tool_name,
	tool_result={"error": "Episode already finished"},
	step=self._state.step_count,
	max_steps=self._max_steps,
	available_tools=self._tool_names,
	done=True,
	reward=0.0,
	)

	self._state.step_count += 1

	# Execute the tool
	result = self.tool_registry.execute(action.tool_name, action.arguments)

	# Check if episode is done
	done = self._state.step_count >= self._max_steps
	self._done = done

	# Compute reward on final step
	reward = 0.0
	eval_info = {}
	if done and self._current_task:
	eval_result = self.evaluator.evaluate(self._current_task, self.world.action_log)
	reward = eval_result["score"]
	eval_info = eval_result

	return HROnboardingObservation(
	task_id=self._current_task.task_id if self._current_task else "",
	instruction=self._current_task.instruction if self._current_task else "",
	tool_name=action.tool_name,
	tool_result=result,
	step=self._state.step_count,
	max_steps=self._max_steps,
	available_tools=self._tool_names,
	done=done,
	reward=reward,
	metadata={
	"step": self._state.step_count,
	**({"evaluation": eval_info} if eval_info else {}),
	},
	)

	@property
	def state(self) -> State:
	"""Get the current environment state."""
	return self._state