"""Pydantic models for the OpenCode OpenEnv environment.""" from __future__ import annotations from typing import Any from openenv.core.env_server.types import State from pydantic import BaseModel, Field class RolloutTurn(BaseModel): """One intercepted LLM turn captured by the in-sandbox proxy (Mode B).""" turn: int request: dict[str, Any] = Field(default_factory=dict) response: dict[str, Any] = Field(default_factory=dict) completion_tokens: list[str] = Field(default_factory=list) completion_token_ids: list[int] = Field(default_factory=list) per_token_logps: list[float] = Field(default_factory=list) finish_reason: str | None = None latency_s: float = 0.0 timestamp: float = 0.0 class RolloutResult(BaseModel): """Outcome of one call to the ``run_rollout`` tool. Serialized to JSON as the tool result. The training-side client deserializes and feeds ``proxy_turns`` + ``reward`` into GRPO. """ # Identifiers task_id: str = "" sandbox_id: str = "" # Scalars reward: float | None = None exit_code: int = 0 wall_s: float = 0.0 mode: str = "transparent_proxy" # Per-turn trajectory (empty in black_box mode) proxy_turns: list[RolloutTurn] = Field(default_factory=list) # Agent artifacts workdir_files: dict[str, str] = Field(default_factory=dict) agent_log_tail: str = "" # Verifier bookkeeping verifier_stdout: str = "" verifier_stderr: str = "" test_exit_code: int | None = None # Errors (if any) surfacing from sandbox/proxy/verifier path error: str | None = None # Diagnostic tails — populated when the primitive or verifier misbehaves so # the client can see WHAT happened inside the sandbox without a second # round-trip. Each is truncated to a few KB. proxy_log_tail: str = "" install_log_tail: str = "" class OpenCodeState(State): """Persistent env state across calls to the single environment instance. Each HTTP session gets its own OpenCodeEnvironment (via ``SUPPORTS_CONCURRENT_SESSIONS = True`` on the server class), so this state is per-session. """ rollouts_completed: int = 0 last_reward: float | None = None last_task_id: str | None = None last_sandbox_id: str | None = None