AdithyaSK's picture
AdithyaSK HF Staff
Upload folder using huggingface_hub
9e98d70 verified
"""Pydantic models for the OpenCode OpenEnv environment."""
from __future__ import annotations
from typing import Any
from openenv.core.env_server.types import State
from pydantic import BaseModel, Field
class RolloutTurn(BaseModel):
"""One intercepted LLM turn captured by the in-sandbox proxy (Mode B)."""
turn: int
request: dict[str, Any] = Field(default_factory=dict)
response: dict[str, Any] = Field(default_factory=dict)
completion_tokens: list[str] = Field(default_factory=list)
completion_token_ids: list[int] = Field(default_factory=list)
per_token_logps: list[float] = Field(default_factory=list)
finish_reason: str | None = None
latency_s: float = 0.0
timestamp: float = 0.0
class RolloutResult(BaseModel):
"""Outcome of one call to the ``run_rollout`` tool.
Serialized to JSON as the tool result. The training-side client
deserializes and feeds ``proxy_turns`` + ``reward`` into GRPO.
"""
# Identifiers
task_id: str = ""
sandbox_id: str = ""
# Scalars
reward: float | None = None
exit_code: int = 0
wall_s: float = 0.0
mode: str = "transparent_proxy"
# Per-turn trajectory (empty in black_box mode)
proxy_turns: list[RolloutTurn] = Field(default_factory=list)
# Agent artifacts
workdir_files: dict[str, str] = Field(default_factory=dict)
agent_log_tail: str = ""
# Verifier bookkeeping
verifier_stdout: str = ""
verifier_stderr: str = ""
test_exit_code: int | None = None
# Errors (if any) surfacing from sandbox/proxy/verifier path
error: str | None = None
# Diagnostic tails — populated when the primitive or verifier misbehaves so
# the client can see WHAT happened inside the sandbox without a second
# round-trip. Each is truncated to a few KB.
proxy_log_tail: str = ""
install_log_tail: str = ""
class OpenCodeState(State):
"""Persistent env state across calls to the single environment instance.
Each HTTP session gets its own OpenCodeEnvironment (via
``SUPPORTS_CONCURRENT_SESSIONS = True`` on the server class), so this
state is per-session.
"""
rollouts_completed: int = 0
last_reward: float | None = None
last_task_id: str | None = None
last_sandbox_id: str | None = None