Spaces:
Paused
Paused
| from __future__ import annotations | |
| from typing import Any, Dict, List, Optional | |
| from pydantic import BaseModel, Field | |
| # --------------------------------------------------------------------------- | |
| # Action space | |
| # --------------------------------------------------------------------------- | |
| class PayOpsAction(BaseModel): | |
| """ | |
| Action submitted by the agent for a single transaction. | |
| action_type choices | |
| ------------------- | |
| approve – mark transaction as legitimate and allow it through | |
| reject – block the transaction outright | |
| flag – mark for manual review with a soft hold | |
| escalate – route to senior compliance officer / fraud team | |
| inspect – pull additional signals (logs, KYC data, velocity) | |
| hold – temporary hold pending more information | |
| request_docs – ask sender for supporting documents (e.g. invoice, contract) | |
| verify_kyc – trigger an active KYC re-verification check | |
| contact_sender – contact the sender directly to confirm intent | |
| file_sar – file a Suspicious Activity Report to regulator | |
| """ | |
| action_type: str = Field( | |
| ..., | |
| description=( | |
| "One of: approve | reject | flag | escalate | inspect | hold " | |
| "| request_docs | verify_kyc | contact_sender | file_sar" | |
| ), | |
| ) | |
| transaction_id: str = Field(..., description="ID of the transaction being acted on") | |
| reason: Optional[str] = Field( | |
| default=None, description="Free-text rationale from the agent" | |
| ) | |
| confidence: Optional[float] = Field( | |
| default=None, | |
| ge=0.0, | |
| le=1.0, | |
| description="Agent self-reported confidence [0, 1]. Used in reward shaping.", | |
| ) | |
| metadata: Optional[Dict[str, Any]] = Field( | |
| default=None, | |
| description="Optional pass-through metadata (openenv.core.Action compatibility).", | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Observation space | |
| # --------------------------------------------------------------------------- | |
| class PayOpsObservation(BaseModel): | |
| """ | |
| Structured observation returned after each step (and on reset). | |
| Describes the current transaction visible to the agent. | |
| """ | |
| # --- transaction identity --- | |
| transaction_id: str | |
| amount: float = Field(..., description="Transaction amount in the stated currency") | |
| currency: str = Field(..., description="ISO-4217 currency code, e.g. USD, EUR") | |
| sender: str = Field(..., description="Sender identifier (email / account / alias)") | |
| receiver: str = Field(..., description="Receiver identifier") | |
| transaction_type: str = Field( | |
| default="transfer", | |
| description="Type: transfer | payment | withdrawal | refund | internal | loan_repayment | payroll", | |
| ) | |
| # --- risk signals --- | |
| status: str = Field( | |
| default="pending", | |
| description=( | |
| "Current status: pending | approved | rejected | flagged | escalated " | |
| "| held | inspected | docs_requested | kyc_triggered | sender_contacted | sar_filed" | |
| ), | |
| ) | |
| risk_score: float = Field( | |
| ..., ge=0.0, le=1.0, description="Composite ML risk score [0=low, 1=high]" | |
| ) | |
| ml_confidence: float = Field( | |
| default=0.9, | |
| ge=0.0, | |
| le=1.0, | |
| description="Model's self-reported confidence in its own risk_score. Low = possibly poisoned.", | |
| ) | |
| flags: List[str] = Field( | |
| default_factory=list, | |
| description="Active risk flags e.g. high_value, unknown_sender, velocity_breach", | |
| ) | |
| # --- sender behaviour signals --- | |
| velocity_1h: Optional[int] = Field( | |
| default=None, | |
| description="Number of transactions from this sender in the past hour", | |
| ) | |
| velocity_24h: Optional[int] = Field( | |
| default=None, | |
| description="Number of transactions from this sender in the past 24 hours", | |
| ) | |
| avg_transaction_amount: Optional[float] = Field( | |
| default=None, | |
| description="Sender's historical average transaction amount", | |
| ) | |
| account_age_days: Optional[int] = Field( | |
| default=None, | |
| description="Age of the sender account in days", | |
| ) | |
| # --- counterparty / geography --- | |
| country_risk: Optional[str] = Field( | |
| default=None, | |
| description="Receiver country risk tier: low | medium | high | sanctioned", | |
| ) | |
| kyc_status: Optional[str] = Field( | |
| default=None, | |
| description="KYC verification status: verified | pending | failed | none | expired", | |
| ) | |
| kyc_expiry_days: Optional[int] = Field( | |
| default=None, | |
| description="Days until KYC expires (negative = already expired)", | |
| ) | |
| previous_violations: Optional[int] = Field( | |
| default=None, | |
| description="Number of prior compliance violations for this sender", | |
| ) | |
| previous_sars: Optional[int] = Field( | |
| default=None, | |
| description="Number of Suspicious Activity Reports previously filed for this sender", | |
| ) | |
| counterparty_risk: Optional[str] = Field( | |
| default=None, | |
| description="Known risk profile of the receiver: clean | unknown | watchlist | blacklist", | |
| ) | |
| # --- chain context (multi-hop investigation) --- | |
| chain_step: int = Field( | |
| default=1, | |
| description="Which step within a multi-hop investigation chain (1=initial presentation)", | |
| ) | |
| chain_total: int = Field( | |
| default=1, | |
| description="Total number of chained investigation steps for this task", | |
| ) | |
| chain_context: Optional[str] = Field( | |
| default=None, | |
| description="Summary of findings from earlier chain steps", | |
| ) | |
| # --- resource tracking --- | |
| steps_remaining: Optional[int] = Field( | |
| default=None, | |
| description="How many investigation sub-steps remain before a terminal decision is required", | |
| ) | |
| action_cost: float = Field( | |
| default=0.0, | |
| description="Operational cost penalty incurred by the last action", | |
| ) | |
| budget_remaining: float = Field( | |
| default=5.0, | |
| description="Remaining investigation budget (starts at 5.0; each investigation action deducts its cost)", | |
| ) | |
| # --- context from prior investigation actions --- | |
| inspection_notes: Optional[str] = Field( | |
| default=None, | |
| description="Additional details revealed after an 'inspect' action", | |
| ) | |
| docs_notes: Optional[str] = Field( | |
| default=None, | |
| description="Document review findings after a 'request_docs' action", | |
| ) | |
| kyc_notes: Optional[str] = Field( | |
| default=None, | |
| description="KYC re-verification outcome after a 'verify_kyc' action", | |
| ) | |
| contact_notes: Optional[str] = Field( | |
| default=None, | |
| description="Outcome of contacting the sender via 'contact_sender' action", | |
| ) | |
| # --- recommended investigation sub-actions for this task --- | |
| investigation_hints: List[str] = Field( | |
| default_factory=list, | |
| description=( | |
| "Sub-actions recommended for this task (non-exhaustive). " | |
| "Using them before the terminal decision earns bonus reward and may reveal " | |
| "decisive evidence. Empty list = no specific investigation required." | |
| ), | |
| ) | |
| # --- recent decision context (last 3 decisions in this episode) --- | |
| recent_decisions: List[Dict[str, Any]] = Field( | |
| default_factory=list, | |
| description="Last up to 3 completed decisions in this episode for pattern context", | |
| ) | |
| # --- episode bookkeeping --- | |
| task_id: str = Field(default="", description="Identifier of the active task") | |
| task_difficulty: str = Field( | |
| default="easy", description="Difficulty tier: easy | medium | hard | critical" | |
| ) | |
| step_in_episode: int = Field( | |
| default=0, description="How many steps have elapsed in this episode" | |
| ) | |
| reward: float = Field(default=0.0, description="Reward from the last action") | |
| reward_breakdown: Dict[str, float] = Field( | |
| default_factory=dict, | |
| description="Itemised reward components: base, time_penalty, confidence_bonus, cost_penalty", | |
| ) | |
| cumulative_reward: float = Field( | |
| default=0.0, description="Total reward accumulated so far in this episode" | |
| ) | |
| done: bool = Field(default=False, description="Whether the episode has ended") | |
| network_graph: Optional[Dict[str, Any]] = Field( | |
| default=None, | |
| description="Mule-chain / correspondent-bank relationship graph for tasks where present", | |
| ) | |
| info: Dict[str, Any] = Field( | |
| default_factory=dict, | |
| description="Extra diagnostic information (action taken, correct action, etc.)", | |
| ) | |
| metadata: Optional[Dict[str, Any]] = Field( | |
| default=None, | |
| description="Optional pass-through metadata (openenv.core.Observation compatibility).", | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Reward breakdown (typed model for openenv.core spec compliance) | |
| # --------------------------------------------------------------------------- | |
| class PayOpsReward(BaseModel): | |
| """ | |
| Typed reward model returned alongside each observation. | |
| ``value`` is the normalised reward \u2208 [0.0, 1.0] for the episode so far. | |
| ``breakdown`` itemises the components that contributed to the raw score. | |
| """ | |
| value: float = Field( | |
| default=0.0, | |
| ge=0.0, | |
| le=1.0, | |
| description="Normalised episode reward \u2208 [0.0, 1.0]", | |
| ) | |
| breakdown: Dict[str, float] = Field( | |
| default_factory=dict, | |
| description=( | |
| "Per-component reward breakdown: terminal_correct, investigation_bonus, " | |
| "flag_identification_bonus, confidence_bonus, duplicate_penalty, budget_penalty" | |
| ), | |
| ) | |
| raw_total: float = Field( | |
| default=0.0, | |
| description="Raw (un-normalised) sum of reward components before clamping", | |
| ) | |
| max_possible: float = Field( | |
| default=1.0, | |
| description="Maximum achievable raw reward for this episode", | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Internal state (used by the server's /state endpoint) | |
| # --------------------------------------------------------------------------- | |
| class PayOpsState(BaseModel): | |
| episode_id: Optional[str] = None | |
| step_count: int = 0 | |
| current_task_id: str = "" | |
| transactions_processed: int = 0 | |
| total_tasks: int = 0 | |
| cumulative_reward: float = 0.0 | |
| budget_spent: float = Field(default=0.0, description="Total action costs accumulated") | |
| budget_limit: float = Field(default=5.0, description="Max investigation budget per episode") | |
| actions_taken: List[str] = Field(default_factory=list) | |
| last_action: Optional[str] = None | |
| investigation_actions_used: List[str] = Field( | |
| default_factory=list, | |
| description="All investigation sub-actions used this episode (inspect, request_docs, etc.)", | |
| ) | |
| correct_decisions: int = Field(default=0, description="Terminal decisions that matched ground truth") | |
| wrong_high_cost: int = Field( | |
| default=0, description="Count of approve-on-fraud type mistakes" | |
| ) | |
| recent_decisions: List[Dict[str, Any]] = Field( | |
| default_factory=list, | |
| description="Recent completed task outcomes for analytics", | |
| ) | |
| done: bool = False | |
| episode_seed: Optional[int] = Field( | |
| default=None, | |
| description="Random seed used to jitter task parameters this episode (for reproducibility)", | |
| ) | |