Spaces:
Running
Running
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the BSD-style license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| """Unit tests for OpenCodeSession / OpenCodeSessionFactory (no sandbox).""" | |
| from __future__ import annotations | |
| import pytest | |
| from opencode_env.config import OpenCodeConfig | |
| from opencode_env.harness import OpenCodeSession, OpenCodeSessionFactory | |
| from opencode_env.sandbox.base import ExecResult | |
| from opencode_env.task import OpenCodeTask | |
| class _FakeBgJob: | |
| def __init__(self) -> None: | |
| self.pid = 123 | |
| self._killed = False | |
| def wait(self, timeout: float | None = None) -> int: | |
| return 0 | |
| def kill(self) -> None: | |
| self._killed = True | |
| class _FakeSandbox: | |
| """In-memory sandbox that records every interaction.""" | |
| def __init__(self, *, install_exit: int = 0, setup_exit: int = 0) -> None: | |
| self.sandbox_id = "fake-sbx" | |
| self.exec_calls: list[tuple[str, dict | None]] = [] | |
| self.written: dict[str, str] = {} | |
| self.bg_calls: list[tuple[str, dict | None]] = [] | |
| self.killed = False | |
| self._install_exit = install_exit | |
| self._setup_exit = setup_exit | |
| def exec(self, cmd, *, envs=None, cwd=None, timeout=60): | |
| self.exec_calls.append((cmd, envs)) | |
| # Health probe: the factory issues ``echo ok`` up to 15 times before | |
| # doing anything else. The fake sandbox is "ready" on the first try. | |
| if cmd.strip() == "echo ok": | |
| return ExecResult(0, "ok\n", "") | |
| if "opencode.ai/install" in cmd: | |
| return ExecResult(self._install_exit, "opencode 0.0.0\n", "") | |
| return ExecResult(self._setup_exit, "", "") | |
| def start_bg(self, cmd, *, envs=None, cwd=None): | |
| self.bg_calls.append((cmd, envs)) | |
| return _FakeBgJob() | |
| def write_text(self, path, content): | |
| self.written[path] = content | |
| def read_text(self, path): | |
| return self.written.get(path, "") | |
| def exists(self, path): | |
| return path in self.written | |
| def kill(self): | |
| self.killed = True | |
| class _FakeBackend: | |
| def __init__(self, sandbox: _FakeSandbox) -> None: | |
| self._sandbox = sandbox | |
| self.create_calls = 0 | |
| def create(self, *, timeout_s=900, envs=None, metadata=None): | |
| self.create_calls += 1 | |
| return self._sandbox | |
| def _config(**overrides) -> OpenCodeConfig: | |
| base = dict( | |
| provider="openai", | |
| base_url="https://api.openai.com/v1", | |
| api_key="sk-fake", | |
| model="openai/gpt-5.3-codex", | |
| ) | |
| base.update(overrides) | |
| return OpenCodeConfig(**base) | |
| def test_factory_bootstraps_and_starts_agent(): | |
| sbx = _FakeSandbox() | |
| backend = _FakeBackend(sbx) | |
| factory = OpenCodeSessionFactory(config=_config(), sandbox_backend=backend) | |
| session = factory.create(task="solve fizzbuzz") | |
| assert backend.create_calls == 1 | |
| assert any("opencode.ai/install" in c for c, _ in sbx.exec_calls) | |
| assert "/home/user/.config/opencode/opencode.json" in sbx.written | |
| assert sbx.written["/home/user/task/instruction.md"] == "solve fizzbuzz" | |
| assert len(sbx.bg_calls) == 1, "agent must be started in background" | |
| # OPENAI_BASE_URL must be injected into the process env | |
| _, envs = sbx.bg_calls[0] | |
| assert envs["OPENAI_BASE_URL"] == "https://api.openai.com/v1" | |
| assert envs["OPENAI_API_KEY"] == "sk-fake" | |
| assert isinstance(session, OpenCodeSession) | |
| def test_factory_runs_task_setup_shell(): | |
| sbx = _FakeSandbox() | |
| factory = OpenCodeSessionFactory( | |
| config=_config(), sandbox_backend=_FakeBackend(sbx) | |
| ) | |
| task = OpenCodeTask(instruction="x", setup_shell="pip install pytest") | |
| factory.create(task=task) | |
| setup_cmds = [c for c, _ in sbx.exec_calls if "pip install" in c] | |
| assert setup_cmds == ["pip install pytest"] | |
| def test_factory_uploads_extra_files(): | |
| sbx = _FakeSandbox() | |
| factory = OpenCodeSessionFactory( | |
| config=_config(), sandbox_backend=_FakeBackend(sbx) | |
| ) | |
| task = OpenCodeTask( | |
| instruction="run it", | |
| upload_files={"/home/user/workdir/hello.py": "print('hi')"}, | |
| ) | |
| factory.create(task=task) | |
| assert sbx.written["/home/user/workdir/hello.py"] == "print('hi')" | |
| def test_factory_kills_sandbox_on_install_failure(): | |
| sbx = _FakeSandbox(install_exit=1) | |
| factory = OpenCodeSessionFactory( | |
| config=_config(), sandbox_backend=_FakeBackend(sbx) | |
| ) | |
| with pytest.raises(RuntimeError, match="install failed"): | |
| factory.create(task="x") | |
| assert sbx.killed | |
| def test_factory_accepts_transparent_proxy_mode(): | |
| f = OpenCodeSessionFactory( | |
| config=_config(), | |
| sandbox_backend=_FakeBackend(_FakeSandbox()), | |
| mode="transparent_proxy", | |
| ) | |
| assert f._mode == "transparent_proxy" | |
| def test_factory_rejects_unknown_mode(): | |
| with pytest.raises(ValueError, match="Unknown mode"): | |
| OpenCodeSessionFactory( | |
| config=_config(), | |
| sandbox_backend=_FakeBackend(_FakeSandbox()), | |
| mode="bogus", # type: ignore[arg-type] | |
| ) | |
| def test_session_initial_messages(): | |
| sbx = _FakeSandbox() | |
| session = OpenCodeSession( | |
| sandbox=sbx, | |
| config=_config(), | |
| task=OpenCodeTask(instruction="hi"), | |
| ) | |
| assert session.initial_messages() == [{"role": "user", "content": "hi"}] | |
| def test_session_verify_without_verifier_returns_none_reward(): | |
| sbx = _FakeSandbox() | |
| session = OpenCodeSession( | |
| sandbox=sbx, | |
| config=_config(), | |
| task=OpenCodeTask(instruction="x"), | |
| ) | |
| result = session.verify(transcript=[]) | |
| assert result.env_reward is None | |
| assert result.done is True | |
| def test_session_verify_calls_user_verifier(): | |
| from openenv.core.harness import VerifyResult | |
| sbx = _FakeSandbox() | |
| calls = [] | |
| def verifier(sandbox, task): | |
| calls.append((sandbox.sandbox_id, task.instruction)) | |
| return VerifyResult(env_reward=1.0, done=True, metrics={"tests": "pass"}) | |
| session = OpenCodeSession( | |
| sandbox=sbx, | |
| config=_config(), | |
| task=OpenCodeTask(instruction="do"), | |
| verifier=verifier, | |
| ) | |
| result = session.verify(transcript=[]) | |
| assert calls == [("fake-sbx", "do")] | |
| assert result.env_reward == 1.0 | |
| assert result.metrics == {"tests": "pass"} | |
| def test_session_close_kills_job_and_sandbox(): | |
| sbx = _FakeSandbox() | |
| session = OpenCodeSession( | |
| sandbox=sbx, | |
| config=_config(), | |
| task=OpenCodeTask(instruction="x"), | |
| ) | |
| session._bg_job = _FakeBgJob() | |
| session.close() | |
| assert session._bg_job is None | |
| assert sbx.killed | |