opencode-env / tests /test_harness.py
AdithyaSK's picture
AdithyaSK HF Staff
Upload folder using huggingface_hub
70f2179 verified
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Unit tests for OpenCodeSession / OpenCodeSessionFactory (no sandbox)."""
from __future__ import annotations
import pytest
from opencode_env.config import OpenCodeConfig
from opencode_env.harness import OpenCodeSession, OpenCodeSessionFactory
from opencode_env.sandbox.base import ExecResult
from opencode_env.task import OpenCodeTask
class _FakeBgJob:
def __init__(self) -> None:
self.pid = 123
self._killed = False
def wait(self, timeout: float | None = None) -> int:
return 0
def kill(self) -> None:
self._killed = True
class _FakeSandbox:
"""In-memory sandbox that records every interaction."""
def __init__(self, *, install_exit: int = 0, setup_exit: int = 0) -> None:
self.sandbox_id = "fake-sbx"
self.exec_calls: list[tuple[str, dict | None]] = []
self.written: dict[str, str] = {}
self.bg_calls: list[tuple[str, dict | None]] = []
self.killed = False
self._install_exit = install_exit
self._setup_exit = setup_exit
def exec(self, cmd, *, envs=None, cwd=None, timeout=60):
self.exec_calls.append((cmd, envs))
# Health probe: the factory issues ``echo ok`` up to 15 times before
# doing anything else. The fake sandbox is "ready" on the first try.
if cmd.strip() == "echo ok":
return ExecResult(0, "ok\n", "")
if "opencode.ai/install" in cmd:
return ExecResult(self._install_exit, "opencode 0.0.0\n", "")
return ExecResult(self._setup_exit, "", "")
def start_bg(self, cmd, *, envs=None, cwd=None):
self.bg_calls.append((cmd, envs))
return _FakeBgJob()
def write_text(self, path, content):
self.written[path] = content
def read_text(self, path):
return self.written.get(path, "")
def exists(self, path):
return path in self.written
def kill(self):
self.killed = True
class _FakeBackend:
def __init__(self, sandbox: _FakeSandbox) -> None:
self._sandbox = sandbox
self.create_calls = 0
def create(self, *, timeout_s=900, envs=None, metadata=None):
self.create_calls += 1
return self._sandbox
def _config(**overrides) -> OpenCodeConfig:
base = dict(
provider="openai",
base_url="https://api.openai.com/v1",
api_key="sk-fake",
model="openai/gpt-5.3-codex",
)
base.update(overrides)
return OpenCodeConfig(**base)
def test_factory_bootstraps_and_starts_agent():
sbx = _FakeSandbox()
backend = _FakeBackend(sbx)
factory = OpenCodeSessionFactory(config=_config(), sandbox_backend=backend)
session = factory.create(task="solve fizzbuzz")
assert backend.create_calls == 1
assert any("opencode.ai/install" in c for c, _ in sbx.exec_calls)
assert "/home/user/.config/opencode/opencode.json" in sbx.written
assert sbx.written["/home/user/task/instruction.md"] == "solve fizzbuzz"
assert len(sbx.bg_calls) == 1, "agent must be started in background"
# OPENAI_BASE_URL must be injected into the process env
_, envs = sbx.bg_calls[0]
assert envs["OPENAI_BASE_URL"] == "https://api.openai.com/v1"
assert envs["OPENAI_API_KEY"] == "sk-fake"
assert isinstance(session, OpenCodeSession)
def test_factory_runs_task_setup_shell():
sbx = _FakeSandbox()
factory = OpenCodeSessionFactory(
config=_config(), sandbox_backend=_FakeBackend(sbx)
)
task = OpenCodeTask(instruction="x", setup_shell="pip install pytest")
factory.create(task=task)
setup_cmds = [c for c, _ in sbx.exec_calls if "pip install" in c]
assert setup_cmds == ["pip install pytest"]
def test_factory_uploads_extra_files():
sbx = _FakeSandbox()
factory = OpenCodeSessionFactory(
config=_config(), sandbox_backend=_FakeBackend(sbx)
)
task = OpenCodeTask(
instruction="run it",
upload_files={"/home/user/workdir/hello.py": "print('hi')"},
)
factory.create(task=task)
assert sbx.written["/home/user/workdir/hello.py"] == "print('hi')"
def test_factory_kills_sandbox_on_install_failure():
sbx = _FakeSandbox(install_exit=1)
factory = OpenCodeSessionFactory(
config=_config(), sandbox_backend=_FakeBackend(sbx)
)
with pytest.raises(RuntimeError, match="install failed"):
factory.create(task="x")
assert sbx.killed
def test_factory_accepts_transparent_proxy_mode():
f = OpenCodeSessionFactory(
config=_config(),
sandbox_backend=_FakeBackend(_FakeSandbox()),
mode="transparent_proxy",
)
assert f._mode == "transparent_proxy"
def test_factory_rejects_unknown_mode():
with pytest.raises(ValueError, match="Unknown mode"):
OpenCodeSessionFactory(
config=_config(),
sandbox_backend=_FakeBackend(_FakeSandbox()),
mode="bogus", # type: ignore[arg-type]
)
def test_session_initial_messages():
sbx = _FakeSandbox()
session = OpenCodeSession(
sandbox=sbx,
config=_config(),
task=OpenCodeTask(instruction="hi"),
)
assert session.initial_messages() == [{"role": "user", "content": "hi"}]
def test_session_verify_without_verifier_returns_none_reward():
sbx = _FakeSandbox()
session = OpenCodeSession(
sandbox=sbx,
config=_config(),
task=OpenCodeTask(instruction="x"),
)
result = session.verify(transcript=[])
assert result.env_reward is None
assert result.done is True
def test_session_verify_calls_user_verifier():
from openenv.core.harness import VerifyResult
sbx = _FakeSandbox()
calls = []
def verifier(sandbox, task):
calls.append((sandbox.sandbox_id, task.instruction))
return VerifyResult(env_reward=1.0, done=True, metrics={"tests": "pass"})
session = OpenCodeSession(
sandbox=sbx,
config=_config(),
task=OpenCodeTask(instruction="do"),
verifier=verifier,
)
result = session.verify(transcript=[])
assert calls == [("fake-sbx", "do")]
assert result.env_reward == 1.0
assert result.metrics == {"tests": "pass"}
def test_session_close_kills_job_and_sandbox():
sbx = _FakeSandbox()
session = OpenCodeSession(
sandbox=sbx,
config=_config(),
task=OpenCodeTask(instruction="x"),
)
session._bg_job = _FakeBgJob()
session.close()
assert session._bg_job is None
assert sbx.killed