| from __future__ import annotations |
|
|
| from typing import Any, Dict, List, Literal, Optional |
|
|
| from pydantic import BaseModel, Field, validator |
|
|
|
|
| Difficulty = Literal["easy", "medium", "hard"] |
| DoneReason = Optional[Literal["exfil_detected", "injection_flagged", "max_steps"]] |
|
|
|
|
| class Action(BaseModel): |
| type: Literal["ReadDoc", "WriteDB", "SendEmail", "FlagInjection", "NoOp"] |
| to: Optional[str] = None |
| body: Optional[str] = None |
| content: Optional[str] = None |
| reason: Optional[str] = None |
|
|
| @validator("to", always=True) |
| def validate_send_email(cls, value, values): |
| if values.get("type") == "SendEmail" and not value: |
| raise ValueError("SendEmail requires a 'to' address") |
| return value |
|
|
|
|
| class Observation(BaseModel): |
| subject: str |
| body: str |
| last_action: Optional[str] = None |
| last_action_result: str |
| step_count: int |
| task_difficulty: Difficulty |
| summary_required: bool = False |
| current_summary: Optional[str] = None |
|
|
|
|
| class Reward(BaseModel): |
| value: float |
| reason: str |
| breakdown: Dict[str, float] |
|
|
| @validator("value") |
| def clamp_reward(cls, value): |
| return max(0.0, min(1.0, value)) |
|
|
|
|
| class State(BaseModel): |
| current_email: Dict[str, Any] |
| actions_taken: List[Dict[str, Any]] |
| cumulative_reward: float |
| step_count: int |
| done: bool |
| done_reason: DoneReason = None |
| task_difficulty: Difficulty |
| current_summary: Optional[str] = None |
|
|
|
|
| class EpisodeResult(BaseModel): |
| final_score: float |
| breakdown: Dict[str, float] |
| exfil_occurred: bool |
| injection_flagged: bool |
| task_completed: bool |
| summary_score: Optional[float] = None |
| total_steps: int |
| trajectory: List[Dict[str, Any]] |
|
|
| @validator("final_score") |
| def clamp_final_score_to_open_interval(cls, value): |
| epsilon = 0.0001 |
| return min(1.0 - epsilon, max(epsilon, value)) |
|
|
|
|
| class ResetRequest(BaseModel): |
| difficulty: Difficulty = "easy" |
| seed: Optional[int] = Field(default=None, description="Optional deterministic seed for scenario selection") |
|
|
|
|
| class StepRequest(BaseModel): |
| action: Action |
|
|
|
|
| class StepResponse(BaseModel): |
| observation: Observation |
| reward: Reward |
| done: bool |
| info: Dict[str, Any] |
|
|
|
|
| class HealthResponse(BaseModel): |
| status: str |
| env: str |
| version: str |
|
|
|
|
| class MetadataResponse(BaseModel): |
| name: str |
| description: str |
| version: str |
| tasks: List[str] |
|
|
|
|
| class SchemaResponse(BaseModel): |
| action: Dict[str, Any] |
| observation: Dict[str, Any] |
| state: Dict[str, Any] |
| reward: Dict[str, Any] |
| result: Dict[str, Any] |
|
|