Spaces:
Running
Running
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the BSD-style license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| """Client for the deployed opencode_env server. | |
| The server exposes a single MCP tool ``run_rollout`` that runs one OpenCode | |
| rollout in an E2B sandbox and returns a JSON-serialized :class:`RolloutResult`. | |
| Example:: | |
| from opencode_env import OpenCodeEnv | |
| with OpenCodeEnv(base_url="https://adithya-sk-opencode-env.hf.space") as env: | |
| env.reset() | |
| result = env.run_rollout( | |
| base_url="https://api.openai.com/v1", | |
| api_key=os.environ["OPENAI_API_KEY"], | |
| model="gpt-4o-mini", | |
| instruction="Create binary_search.py exposing def binary_search(arr, target) -> int...", | |
| setup=[], | |
| verify=["python /home/user/test.py"], | |
| task_id="binary_search_v1", | |
| ) | |
| print(result.reward, len(result.proxy_turns)) | |
| """ | |
| from __future__ import annotations | |
| import json | |
| from typing import Any | |
| from openenv.core.mcp_client import MCPToolClient | |
| try: | |
| from .models import RolloutResult | |
| except ImportError: # pragma: no cover | |
| from models import RolloutResult # type: ignore | |
| class OpenCodeEnv(MCPToolClient): | |
| """Typed client for the opencode_env MCP server. | |
| Inherits ``reset`` / ``call_tool`` / ``list_tools`` / ``from_docker_image`` | |
| / context-manager semantics from :class:`MCPToolClient`. | |
| """ | |
| def run_rollout( | |
| self, | |
| *, | |
| # Endpoint — pass either the shorthand selector OR explicit fields. | |
| endpoint: str = "", # "vllm" | "openai" | "hf_router" | |
| base_url: str = "", | |
| api_key: str = "", | |
| model: str = "", | |
| # Task — the "list of bash commands" shape | |
| instruction: str, | |
| setup: list[str] | None = None, | |
| verify: list[str] | None = None, | |
| # Bookkeeping / tunables | |
| task_id: str = "", | |
| mode: str = "transparent_proxy", | |
| disable_thinking: bool | None = None, | |
| max_tokens_cap: int = 4096, | |
| top_logprobs: int = 5, | |
| agent_timeout_s: float = 600.0, | |
| template: str = "", | |
| ) -> RolloutResult: | |
| """Run one OpenCode rollout and return the typed result. | |
| Args: | |
| base_url: OpenAI-compatible LLM endpoint (with trailing /v1). | |
| api_key: Bearer token for the LLM. Use ``"intercepted"`` for vLLM | |
| if it doesn't enforce auth. | |
| model: Model id understood by the LLM endpoint | |
| (e.g. ``"gpt-4o-mini"``, ``"Qwen/Qwen3.5-4B"``, | |
| ``"Qwen/Qwen3-4B-Instruct-2507:nscale"``). | |
| instruction: Prompt passed to ``opencode run``. | |
| setup: Bash commands run sequentially **before** the agent starts. | |
| Each command runs in the sandbox; non-zero exit aborts setup. | |
| verify: Bash commands run sequentially **after** the agent exits. | |
| Reward = ``passed_count / total`` unless any command writes a | |
| float to ``/home/user/logs/verifier/reward.txt`` (override). | |
| task_id: Echoed back in the result for traceability. | |
| mode: ``"transparent_proxy"`` (captures per-token logprobs via | |
| an in-sandbox FastAPI proxy) or ``"black_box"`` (no proxy). | |
| disable_thinking: Inject | |
| ``chat_template_kwargs.enable_thinking=false`` on forwarded | |
| requests. Needed for Qwen3.5 vLLM; harmless on Instruct | |
| variants; rejected by OpenAI direct. | |
| max_tokens_cap: Clamp on per-turn ``max_tokens``. OpenCode asks | |
| for ~32k by default; gpt-4o-mini caps at 16k. | |
| top_logprobs: Top-k logprobs requested upstream. HF Router caps | |
| at 5; OpenAI accepts up to 20; vLLM is unbounded. | |
| agent_timeout_s: Hard wall-clock budget for one ``opencode run``. | |
| template: E2B template name (e.g. ``"opencode-rl"``). Empty | |
| string uses the default (slow) base image. | |
| Returns: | |
| A :class:`RolloutResult` with reward, per-turn logprobs, file | |
| outputs, setup/verify results, and diagnostic tails. | |
| """ | |
| raw = self.call_tool( | |
| "run_rollout", | |
| endpoint=endpoint, | |
| base_url=base_url, | |
| api_key=api_key, | |
| model=model, | |
| instruction=instruction, | |
| setup=list(setup or []), | |
| verify=list(verify or []), | |
| task_id=task_id, | |
| mode=mode, | |
| disable_thinking=disable_thinking, | |
| max_tokens_cap=max_tokens_cap, | |
| top_logprobs=top_logprobs, | |
| agent_timeout_s=agent_timeout_s, | |
| template=template, | |
| ) | |
| return RolloutResult.model_validate_json(_extract_text(raw)) | |
| def _extract_text(result: Any) -> str: | |
| """Pull the JSON text out of whatever shape the MCP layer returns. | |
| Handles the three shapes :meth:`MCPToolClient.call_tool` may surface: | |
| a raw string, a ``CallToolObservation``-like object with | |
| ``.result.content[0].text``, or a dict with ``content[0]["text"]``. | |
| """ | |
| if isinstance(result, str): | |
| return result | |
| inner = getattr(result, "result", None) | |
| if inner is not None: | |
| content = getattr(inner, "content", None) | |
| if content: | |
| first = content[0] | |
| text = getattr(first, "text", None) | |
| if isinstance(text, str): | |
| return text | |
| if isinstance(first, dict) and "text" in first: | |
| return first["text"] | |
| if isinstance(result, dict): | |
| content = result.get("content") | |
| if isinstance(content, list) and content: | |
| first = content[0] | |
| if isinstance(first, dict) and "text" in first: | |
| return first["text"] | |
| nested = result.get("result") | |
| if isinstance(nested, dict): | |
| content = nested.get("content") | |
| if isinstance(content, list) and content: | |
| first = content[0] | |
| if isinstance(first, dict) and "text" in first: | |
| return first["text"] | |
| return json.dumps(result, default=str) | |
| content = getattr(result, "content", None) | |
| if content: | |
| first = content[0] | |
| text = getattr(first, "text", None) | |
| if isinstance(text, str): | |
| return text | |
| return str(result) | |