Spaces:

AdithyaSK
/

opencode-env

Running

File size: 6,501 Bytes

70f2179

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""Client for the deployed opencode_env server.

The server exposes a single MCP tool ``run_rollout`` that runs one OpenCode
rollout in an E2B sandbox and returns a JSON-serialized :class:`RolloutResult`.

Example::

    from opencode_env import OpenCodeEnv

    with OpenCodeEnv(base_url="https://adithya-sk-opencode-env.hf.space") as env:
        env.reset()
        result = env.run_rollout(
            base_url="https://api.openai.com/v1",
            api_key=os.environ["OPENAI_API_KEY"],
            model="gpt-4o-mini",
            instruction="Create binary_search.py exposing def binary_search(arr, target) -> int...",
            setup=[],
            verify=["python /home/user/test.py"],
            task_id="binary_search_v1",
        )
        print(result.reward, len(result.proxy_turns))
"""

from __future__ import annotations

import json
from typing import Any

from openenv.core.mcp_client import MCPToolClient

try:
    from .models import RolloutResult
except ImportError:  # pragma: no cover
    from models import RolloutResult  # type: ignore


class OpenCodeEnv(MCPToolClient):
    """Typed client for the opencode_env MCP server.

    Inherits ``reset`` / ``call_tool`` / ``list_tools`` / ``from_docker_image``
    / context-manager semantics from :class:`MCPToolClient`.
    """

    def run_rollout(
        self,
        *,
        # Endpoint — pass either the shorthand selector OR explicit fields.
        endpoint: str = "",                # "vllm" | "openai" | "hf_router"
        base_url: str = "",
        api_key: str = "",
        model: str = "",
        # Task — the "list of bash commands" shape
        instruction: str,
        setup: list[str] | None = None,
        verify: list[str] | None = None,
        # Bookkeeping / tunables
        task_id: str = "",
        mode: str = "transparent_proxy",
        disable_thinking: bool | None = None,
        max_tokens_cap: int = 4096,
        top_logprobs: int = 5,
        agent_timeout_s: float = 600.0,
        template: str = "",
    ) -> RolloutResult:
        """Run one OpenCode rollout and return the typed result.

        Args:
            base_url: OpenAI-compatible LLM endpoint (with trailing /v1).
            api_key: Bearer token for the LLM. Use ``"intercepted"`` for vLLM
                if it doesn't enforce auth.
            model: Model id understood by the LLM endpoint
                (e.g. ``"gpt-4o-mini"``, ``"Qwen/Qwen3.5-4B"``,
                ``"Qwen/Qwen3-4B-Instruct-2507:nscale"``).
            instruction: Prompt passed to ``opencode run``.
            setup: Bash commands run sequentially **before** the agent starts.
                Each command runs in the sandbox; non-zero exit aborts setup.
            verify: Bash commands run sequentially **after** the agent exits.
                Reward = ``passed_count / total`` unless any command writes a
                float to ``/home/user/logs/verifier/reward.txt`` (override).
            task_id: Echoed back in the result for traceability.
            mode: ``"transparent_proxy"`` (captures per-token logprobs via
                an in-sandbox FastAPI proxy) or ``"black_box"`` (no proxy).
            disable_thinking: Inject
                ``chat_template_kwargs.enable_thinking=false`` on forwarded
                requests. Needed for Qwen3.5 vLLM; harmless on Instruct
                variants; rejected by OpenAI direct.
            max_tokens_cap: Clamp on per-turn ``max_tokens``. OpenCode asks
                for ~32k by default; gpt-4o-mini caps at 16k.
            top_logprobs: Top-k logprobs requested upstream. HF Router caps
                at 5; OpenAI accepts up to 20; vLLM is unbounded.
            agent_timeout_s: Hard wall-clock budget for one ``opencode run``.
            template: E2B template name (e.g. ``"opencode-rl"``). Empty
                string uses the default (slow) base image.

        Returns:
            A :class:`RolloutResult` with reward, per-turn logprobs, file
            outputs, setup/verify results, and diagnostic tails.
        """
        raw = self.call_tool(
            "run_rollout",
            endpoint=endpoint,
            base_url=base_url,
            api_key=api_key,
            model=model,
            instruction=instruction,
            setup=list(setup or []),
            verify=list(verify or []),
            task_id=task_id,
            mode=mode,
            disable_thinking=disable_thinking,
            max_tokens_cap=max_tokens_cap,
            top_logprobs=top_logprobs,
            agent_timeout_s=agent_timeout_s,
            template=template,
        )
        return RolloutResult.model_validate_json(_extract_text(raw))


def _extract_text(result: Any) -> str:
    """Pull the JSON text out of whatever shape the MCP layer returns.

    Handles the three shapes :meth:`MCPToolClient.call_tool` may surface:
    a raw string, a ``CallToolObservation``-like object with
    ``.result.content[0].text``, or a dict with ``content[0]["text"]``.
    """
    if isinstance(result, str):
        return result

    inner = getattr(result, "result", None)
    if inner is not None:
        content = getattr(inner, "content", None)
        if content:
            first = content[0]
            text = getattr(first, "text", None)
            if isinstance(text, str):
                return text
            if isinstance(first, dict) and "text" in first:
                return first["text"]

    if isinstance(result, dict):
        content = result.get("content")
        if isinstance(content, list) and content:
            first = content[0]
            if isinstance(first, dict) and "text" in first:
                return first["text"]
        nested = result.get("result")
        if isinstance(nested, dict):
            content = nested.get("content")
            if isinstance(content, list) and content:
                first = content[0]
                if isinstance(first, dict) and "text" in first:
                    return first["text"]
        return json.dumps(result, default=str)

    content = getattr(result, "content", None)
    if content:
        first = content[0]
        text = getattr(first, "text", None)
        if isinstance(text, str):
            return text

    return str(result)