| """ |
| HR Onboarding/Offboarding Environment Implementation. |
| |
| An OpenEnv environment that simulates enterprise HR workflows. |
| The agent calls tools (hr_create_employee, it_assign_asset, etc.) |
| to complete onboarding/offboarding tasks. Reward is computed via rubrics. |
| """ |
|
|
| import json |
| import random |
| from typing import Any, Dict, List, Optional |
| from uuid import uuid4 |
|
|
| from openenv.core.env_server.interfaces import Environment |
| from openenv.core.env_server.types import State |
|
|
| from models import HROnboardingAction, HROnboardingObservation |
|
|
| try: |
| from .world import WorldState |
| from .tools import ToolRegistry, TOOL_DEFINITIONS |
| from .tasks import TaskGenerator |
| from .rubrics import RubricEvaluator |
| except ImportError: |
| from world import WorldState |
| from tools import ToolRegistry, TOOL_DEFINITIONS |
| from tasks import TaskGenerator |
| from rubrics import RubricEvaluator |
|
|
|
|
| class HROnboardingEnvironment(Environment): |
| """ |
| HR Onboarding/Offboarding environment. |
| |
| Simulates an enterprise HR system with 200+ employees, 8 departments, |
| RBAC, approval chains, and IT provisioning. The agent calls one of 25 |
| tools per step to complete onboarding/offboarding tasks. |
| |
| Example: |
| >>> env = HROnboardingEnvironment() |
| >>> obs = env.reset() |
| >>> print(obs.instruction) # "Onboard Priya Sharma to Engineering..." |
| >>> |
| >>> obs = env.step(HROnboardingAction( |
| ... tool_name="hr_create_employee", |
| ... arguments={"name": "Priya Sharma", "department": "Engineering", |
| ... "level": "L2", "role": "Software Engineer"} |
| ... )) |
| >>> print(obs.tool_result) # {"success": true, "employee": {...}} |
| >>> print(obs.reward) # 0.0 (intermediate) or 0.85 (final) |
| """ |
|
|
| SUPPORTS_CONCURRENT_SESSIONS: bool = True |
|
|
| def __init__(self, seed: int = 42, max_steps: int = 15): |
| """Initialize the HR environment.""" |
| self._seed = seed |
| self._max_steps = max_steps |
| self._rng = random.Random(seed) |
|
|
| |
| self.world = WorldState() |
| self.tool_registry = ToolRegistry(self.world) |
| self.evaluator = RubricEvaluator() |
|
|
| |
| self._task_gen = TaskGenerator(self.world, seed=seed) |
| self._tasks = self._task_gen.generate_all_tasks() |
| self._task_idx = 0 |
| self._current_task = None |
|
|
| |
| self._state = State(episode_id=str(uuid4()), step_count=0) |
| self._done = False |
| self._tool_names = [t["name"] for t in TOOL_DEFINITIONS] |
|
|
| def reset(self) -> HROnboardingObservation: |
| """ |
| Reset the environment for a new episode. |
| |
| Picks the next task, resets world state, returns initial observation |
| with the task instruction and available tools. |
| """ |
| self.world.reset() |
| self._done = False |
|
|
| |
| self._current_task = self._tasks[self._task_idx % len(self._tasks)] |
| self._task_idx += 1 |
|
|
| |
| if self._current_task.setup_fn: |
| self._current_task.setup_fn(self.world) |
|
|
| self._state = State(episode_id=str(uuid4()), step_count=0) |
|
|
| return HROnboardingObservation( |
| task_id=self._current_task.task_id, |
| instruction=self._current_task.instruction, |
| tool_name="", |
| tool_result={}, |
| step=0, |
| max_steps=self._max_steps, |
| available_tools=self._tool_names, |
| done=False, |
| reward=0.0, |
| metadata={ |
| "difficulty": self._current_task.difficulty, |
| "category": self._current_task.category, |
| "context": self._current_task.context, |
| }, |
| ) |
|
|
| def step(self, action: HROnboardingAction) -> HROnboardingObservation: |
| """ |
| Execute one step: call the specified tool and return the result. |
| |
| Args: |
| action: HROnboardingAction with tool_name and arguments. |
| |
| Returns: |
| HROnboardingObservation with tool result, reward (on final step), and done flag. |
| """ |
| if self._done: |
| return HROnboardingObservation( |
| task_id=self._current_task.task_id if self._current_task else "", |
| instruction="", |
| tool_name=action.tool_name, |
| tool_result={"error": "Episode already finished"}, |
| step=self._state.step_count, |
| max_steps=self._max_steps, |
| available_tools=self._tool_names, |
| done=True, |
| reward=0.0, |
| ) |
|
|
| self._state.step_count += 1 |
|
|
| |
| result = self.tool_registry.execute(action.tool_name, action.arguments) |
|
|
| |
| done = self._state.step_count >= self._max_steps |
| self._done = done |
|
|
| |
| reward = 0.0 |
| eval_info = {} |
| if done and self._current_task: |
| eval_result = self.evaluator.evaluate(self._current_task, self.world.action_log) |
| reward = eval_result["score"] |
| eval_info = eval_result |
|
|
| return HROnboardingObservation( |
| task_id=self._current_task.task_id if self._current_task else "", |
| instruction=self._current_task.instruction if self._current_task else "", |
| tool_name=action.tool_name, |
| tool_result=result, |
| step=self._state.step_count, |
| max_steps=self._max_steps, |
| available_tools=self._tool_names, |
| done=done, |
| reward=reward, |
| metadata={ |
| "step": self._state.step_count, |
| **({"evaluation": eval_info} if eval_info else {}), |
| }, |
| ) |
|
|
| @property |
| def state(self) -> State: |
| """Get the current environment state.""" |
| return self._state |
|
|