Spaces:

AdithyaSK
/

opencode-env

Running

App Files Files Community

opencode-env / client.py

AdithyaSK HF Staff

Upload folder using huggingface_hub

70f2179 verified 7 days ago

raw

history blame contribute delete

6.5 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	"""Client for the deployed opencode_env server.

	The server exposes a single MCP tool ``run_rollout`` that runs one OpenCode
	rollout in an E2B sandbox and returns a JSON-serialized :class:`RolloutResult`.

	Example::

	from opencode_env import OpenCodeEnv

	with OpenCodeEnv(base_url="https://adithya-sk-opencode-env.hf.space") as env:
	env.reset()
	result = env.run_rollout(
	base_url="https://api.openai.com/v1",
	api_key=os.environ["OPENAI_API_KEY"],
	model="gpt-4o-mini",
	instruction="Create binary_search.py exposing def binary_search(arr, target) -> int...",
	setup=[],
	verify=["python /home/user/test.py"],
	task_id="binary_search_v1",
	)
	print(result.reward, len(result.proxy_turns))
	"""

	from __future__ import annotations

	import json
	from typing import Any

	from openenv.core.mcp_client import MCPToolClient

	try:
	from .models import RolloutResult
	except ImportError: # pragma: no cover
	from models import RolloutResult # type: ignore


	class OpenCodeEnv(MCPToolClient):
	"""Typed client for the opencode_env MCP server.

	Inherits ``reset`` / ``call_tool`` / ``list_tools`` / ``from_docker_image``
	/ context-manager semantics from :class:`MCPToolClient`.
	"""

	def run_rollout(
	self,
	*,
	# Endpoint — pass either the shorthand selector OR explicit fields.
	endpoint: str = "", # "vllm" \| "openai" \| "hf_router"
	base_url: str = "",
	api_key: str = "",
	model: str = "",
	# Task — the "list of bash commands" shape
	instruction: str,
	setup: list[str] \| None = None,
	verify: list[str] \| None = None,
	# Bookkeeping / tunables
	task_id: str = "",
	mode: str = "transparent_proxy",
	disable_thinking: bool \| None = None,
	max_tokens_cap: int = 4096,
	top_logprobs: int = 5,
	agent_timeout_s: float = 600.0,
	template: str = "",
	) -> RolloutResult:
	"""Run one OpenCode rollout and return the typed result.

	Args:
	base_url: OpenAI-compatible LLM endpoint (with trailing /v1).
	api_key: Bearer token for the LLM. Use ``"intercepted"`` for vLLM
	if it doesn't enforce auth.
	model: Model id understood by the LLM endpoint
	(e.g. ``"gpt-4o-mini"``, ``"Qwen/Qwen3.5-4B"``,
	``"Qwen/Qwen3-4B-Instruct-2507:nscale"``).
	instruction: Prompt passed to ``opencode run``.
	setup: Bash commands run sequentially before the agent starts.
	Each command runs in the sandbox; non-zero exit aborts setup.
	verify: Bash commands run sequentially after the agent exits.
	Reward = ``passed_count / total`` unless any command writes a
	float to ``/home/user/logs/verifier/reward.txt`` (override).
	task_id: Echoed back in the result for traceability.
	mode: ``"transparent_proxy"`` (captures per-token logprobs via
	an in-sandbox FastAPI proxy) or ``"black_box"`` (no proxy).
	disable_thinking: Inject
	``chat_template_kwargs.enable_thinking=false`` on forwarded
	requests. Needed for Qwen3.5 vLLM; harmless on Instruct
	variants; rejected by OpenAI direct.
	max_tokens_cap: Clamp on per-turn ``max_tokens``. OpenCode asks
	for ~32k by default; gpt-4o-mini caps at 16k.
	top_logprobs: Top-k logprobs requested upstream. HF Router caps
	at 5; OpenAI accepts up to 20; vLLM is unbounded.
	agent_timeout_s: Hard wall-clock budget for one ``opencode run``.
	template: E2B template name (e.g. ``"opencode-rl"``). Empty
	string uses the default (slow) base image.

	Returns:
	A :class:`RolloutResult` with reward, per-turn logprobs, file
	outputs, setup/verify results, and diagnostic tails.
	"""
	raw = self.call_tool(
	"run_rollout",
	endpoint=endpoint,
	base_url=base_url,
	api_key=api_key,
	model=model,
	instruction=instruction,
	setup=list(setup or []),
	verify=list(verify or []),
	task_id=task_id,
	mode=mode,
	disable_thinking=disable_thinking,
	max_tokens_cap=max_tokens_cap,
	top_logprobs=top_logprobs,
	agent_timeout_s=agent_timeout_s,
	template=template,
	)
	return RolloutResult.model_validate_json(_extract_text(raw))


	def _extract_text(result: Any) -> str:
	"""Pull the JSON text out of whatever shape the MCP layer returns.

	Handles the three shapes :meth:`MCPToolClient.call_tool` may surface:
	a raw string, a ``CallToolObservation``-like object with
	``.result.content[0].text``, or a dict with ``content[0]["text"]``.
	"""
	if isinstance(result, str):
	return result

	inner = getattr(result, "result", None)
	if inner is not None:
	content = getattr(inner, "content", None)
	if content:
	first = content[0]
	text = getattr(first, "text", None)
	if isinstance(text, str):
	return text
	if isinstance(first, dict) and "text" in first:
	return first["text"]

	if isinstance(result, dict):
	content = result.get("content")
	if isinstance(content, list) and content:
	first = content[0]
	if isinstance(first, dict) and "text" in first:
	return first["text"]
	nested = result.get("result")
	if isinstance(nested, dict):
	content = nested.get("content")
	if isinstance(content, list) and content:
	first = content[0]
	if isinstance(first, dict) and "text" in first:
	return first["text"]
	return json.dumps(result, default=str)

	content = getattr(result, "content", None)
	if content:
	first = content[0]
	text = getattr(first, "text", None)
	if isinstance(text, str):
	return text

	return str(result)