File size: 6,501 Bytes
70f2179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""Client for the deployed opencode_env server.

The server exposes a single MCP tool ``run_rollout`` that runs one OpenCode
rollout in an E2B sandbox and returns a JSON-serialized :class:`RolloutResult`.

Example::

    from opencode_env import OpenCodeEnv

    with OpenCodeEnv(base_url="https://adithya-sk-opencode-env.hf.space") as env:
        env.reset()
        result = env.run_rollout(
            base_url="https://api.openai.com/v1",
            api_key=os.environ["OPENAI_API_KEY"],
            model="gpt-4o-mini",
            instruction="Create binary_search.py exposing def binary_search(arr, target) -> int...",
            setup=[],
            verify=["python /home/user/test.py"],
            task_id="binary_search_v1",
        )
        print(result.reward, len(result.proxy_turns))
"""

from __future__ import annotations

import json
from typing import Any

from openenv.core.mcp_client import MCPToolClient

try:
    from .models import RolloutResult
except ImportError:  # pragma: no cover
    from models import RolloutResult  # type: ignore


class OpenCodeEnv(MCPToolClient):
    """Typed client for the opencode_env MCP server.

    Inherits ``reset`` / ``call_tool`` / ``list_tools`` / ``from_docker_image``
    / context-manager semantics from :class:`MCPToolClient`.
    """

    def run_rollout(
        self,
        *,
        # Endpoint — pass either the shorthand selector OR explicit fields.
        endpoint: str = "",                # "vllm" | "openai" | "hf_router"
        base_url: str = "",
        api_key: str = "",
        model: str = "",
        # Task — the "list of bash commands" shape
        instruction: str,
        setup: list[str] | None = None,
        verify: list[str] | None = None,
        # Bookkeeping / tunables
        task_id: str = "",
        mode: str = "transparent_proxy",
        disable_thinking: bool | None = None,
        max_tokens_cap: int = 4096,
        top_logprobs: int = 5,
        agent_timeout_s: float = 600.0,
        template: str = "",
    ) -> RolloutResult:
        """Run one OpenCode rollout and return the typed result.

        Args:
            base_url: OpenAI-compatible LLM endpoint (with trailing /v1).
            api_key: Bearer token for the LLM. Use ``"intercepted"`` for vLLM
                if it doesn't enforce auth.
            model: Model id understood by the LLM endpoint
                (e.g. ``"gpt-4o-mini"``, ``"Qwen/Qwen3.5-4B"``,
                ``"Qwen/Qwen3-4B-Instruct-2507:nscale"``).
            instruction: Prompt passed to ``opencode run``.
            setup: Bash commands run sequentially **before** the agent starts.
                Each command runs in the sandbox; non-zero exit aborts setup.
            verify: Bash commands run sequentially **after** the agent exits.
                Reward = ``passed_count / total`` unless any command writes a
                float to ``/home/user/logs/verifier/reward.txt`` (override).
            task_id: Echoed back in the result for traceability.
            mode: ``"transparent_proxy"`` (captures per-token logprobs via
                an in-sandbox FastAPI proxy) or ``"black_box"`` (no proxy).
            disable_thinking: Inject
                ``chat_template_kwargs.enable_thinking=false`` on forwarded
                requests. Needed for Qwen3.5 vLLM; harmless on Instruct
                variants; rejected by OpenAI direct.
            max_tokens_cap: Clamp on per-turn ``max_tokens``. OpenCode asks
                for ~32k by default; gpt-4o-mini caps at 16k.
            top_logprobs: Top-k logprobs requested upstream. HF Router caps
                at 5; OpenAI accepts up to 20; vLLM is unbounded.
            agent_timeout_s: Hard wall-clock budget for one ``opencode run``.
            template: E2B template name (e.g. ``"opencode-rl"``). Empty
                string uses the default (slow) base image.

        Returns:
            A :class:`RolloutResult` with reward, per-turn logprobs, file
            outputs, setup/verify results, and diagnostic tails.
        """
        raw = self.call_tool(
            "run_rollout",
            endpoint=endpoint,
            base_url=base_url,
            api_key=api_key,
            model=model,
            instruction=instruction,
            setup=list(setup or []),
            verify=list(verify or []),
            task_id=task_id,
            mode=mode,
            disable_thinking=disable_thinking,
            max_tokens_cap=max_tokens_cap,
            top_logprobs=top_logprobs,
            agent_timeout_s=agent_timeout_s,
            template=template,
        )
        return RolloutResult.model_validate_json(_extract_text(raw))


def _extract_text(result: Any) -> str:
    """Pull the JSON text out of whatever shape the MCP layer returns.

    Handles the three shapes :meth:`MCPToolClient.call_tool` may surface:
    a raw string, a ``CallToolObservation``-like object with
    ``.result.content[0].text``, or a dict with ``content[0]["text"]``.
    """
    if isinstance(result, str):
        return result

    inner = getattr(result, "result", None)
    if inner is not None:
        content = getattr(inner, "content", None)
        if content:
            first = content[0]
            text = getattr(first, "text", None)
            if isinstance(text, str):
                return text
            if isinstance(first, dict) and "text" in first:
                return first["text"]

    if isinstance(result, dict):
        content = result.get("content")
        if isinstance(content, list) and content:
            first = content[0]
            if isinstance(first, dict) and "text" in first:
                return first["text"]
        nested = result.get("result")
        if isinstance(nested, dict):
            content = nested.get("content")
            if isinstance(content, list) and content:
                first = content[0]
                if isinstance(first, dict) and "text" in first:
                    return first["text"]
        return json.dumps(result, default=str)

    content = getattr(result, "content", None)
    if content:
        first = content[0]
        text = getattr(first, "text", None)
        if isinstance(text, str):
            return text

    return str(result)