Spaces:
Sleeping
Sleeping
| """Pydantic models for the OpenCode OpenEnv environment.""" | |
| from __future__ import annotations | |
| from typing import Any | |
| from openenv.core.env_server.types import State | |
| from pydantic import BaseModel, Field | |
| class RolloutTurn(BaseModel): | |
| """One intercepted LLM turn captured by the in-sandbox proxy (Mode B).""" | |
| turn: int | |
| request: dict[str, Any] = Field(default_factory=dict) | |
| response: dict[str, Any] = Field(default_factory=dict) | |
| completion_tokens: list[str] = Field(default_factory=list) | |
| completion_token_ids: list[int] = Field(default_factory=list) | |
| per_token_logps: list[float] = Field(default_factory=list) | |
| finish_reason: str | None = None | |
| latency_s: float = 0.0 | |
| timestamp: float = 0.0 | |
| class RolloutResult(BaseModel): | |
| """Outcome of one call to the ``run_rollout`` tool. | |
| Serialized to JSON as the tool result. The training-side client | |
| deserializes and feeds ``proxy_turns`` + ``reward`` into GRPO. | |
| """ | |
| # Identifiers | |
| task_id: str = "" | |
| sandbox_id: str = "" | |
| # Scalars | |
| reward: float | None = None | |
| exit_code: int = 0 | |
| wall_s: float = 0.0 | |
| mode: str = "transparent_proxy" | |
| # Per-turn trajectory (empty in black_box mode) | |
| proxy_turns: list[RolloutTurn] = Field(default_factory=list) | |
| # Agent artifacts | |
| workdir_files: dict[str, str] = Field(default_factory=dict) | |
| agent_log_tail: str = "" | |
| # Verifier bookkeeping | |
| verifier_stdout: str = "" | |
| verifier_stderr: str = "" | |
| test_exit_code: int | None = None | |
| # Errors (if any) surfacing from sandbox/proxy/verifier path | |
| error: str | None = None | |
| # Diagnostic tails — populated when the primitive or verifier misbehaves so | |
| # the client can see WHAT happened inside the sandbox without a second | |
| # round-trip. Each is truncated to a few KB. | |
| proxy_log_tail: str = "" | |
| install_log_tail: str = "" | |
| class OpenCodeState(State): | |
| """Persistent env state across calls to the single environment instance. | |
| Each HTTP session gets its own OpenCodeEnvironment (via | |
| ``SUPPORTS_CONCURRENT_SESSIONS = True`` on the server class), so this | |
| state is per-session. | |
| """ | |
| rollouts_completed: int = 0 | |
| last_reward: float | None = None | |
| last_task_id: str | None = None | |
| last_sandbox_id: str | None = None | |