| """Smoke-test the live ForgeEnv Space end-to-end via the OpenEnv client. |
| |
| Runs one full episode against the deployed Space: |
| |
| reset() -> drift-gen turn |
| step(DriftAction) -> repair turn |
| step(RepairAction) -> reward + verifier breakdown |
| |
| This is the simplest possible "is the deployed env working?" check |
| and a clean standalone artifact for the hackathon writeup/video. |
| |
| Usage:: |
| |
| python scripts/test_live_env.py |
| """ |
| from __future__ import annotations |
|
|
| import asyncio |
| import json |
|
|
| from openenv.core import GenericAction, GenericEnvClient |
|
|
| ENV_URL = "https://akhiilll-forgeenv.hf.space" |
|
|
|
|
| def _summary(result, label: str) -> None: |
| obs = result.observation if isinstance(result.observation, dict) else {} |
| print(f"\n=== {label} ===") |
| print(f"phase : {obs.get('current_phase')}") |
| print(f"task_id : {obs.get('task_id')}") |
| print(f"target_category : {obs.get('target_category')}") |
| print(f"reward : {result.reward}") |
| print(f"done : {result.done}") |
| breakdown = obs.get("reward_breakdown") |
| if breakdown: |
| print("reward_breakdown:") |
| print(json.dumps(breakdown, indent=2)) |
| script = obs.get("script_content") or obs.get("broken_script") or "" |
| if script: |
| preview = script.splitlines()[:8] |
| print("script preview :") |
| for line in preview: |
| print(f" | {line}") |
| if len(script.splitlines()) > 8: |
| print(" | ...") |
|
|
|
|
| async def main(seed: int = 42) -> None: |
| print(f"connecting to {ENV_URL} (seed={seed}) ...") |
| client = GenericEnvClient(base_url=ENV_URL) |
|
|
| res = await client.reset(seed=seed, options={"difficulty": "medium"}) |
| _summary(res, "after reset()") |
| target = res.observation.get("target_category", "RenameApiCall") if isinstance(res.observation, dict) else "RenameApiCall" |
|
|
| res = await client.step(GenericAction( |
| breakage={"action_type": "breakage", "primitive_type": target, "params": {}}, |
| repair=None, |
| )) |
| _summary(res, "after drift step (Challenger)") |
|
|
| |
| res = await client.step(GenericAction( |
| breakage=None, |
| repair={"action_type": "repair", "unified_diff": ""}, |
| )) |
| _summary(res, "after repair step (Solver, no-op)") |
|
|
| print("\nOK -- reset + 2 steps round-trip the deployed env.") |
|
|
|
|
| if __name__ == "__main__": |
| import sys |
|
|
| seed = int(sys.argv[1]) if len(sys.argv) > 1 else 42 |
| asyncio.run(main(seed=seed)) |
|
|