"""Smoke-test the live ForgeEnv Space end-to-end via the OpenEnv client. Runs one full episode against the deployed Space: reset() -> drift-gen turn step(DriftAction) -> repair turn step(RepairAction) -> reward + verifier breakdown This is the simplest possible "is the deployed env working?" check and a clean standalone artifact for the hackathon writeup/video. Usage:: python scripts/test_live_env.py """ from __future__ import annotations import asyncio import json from openenv.core import GenericAction, GenericEnvClient ENV_URL = "https://akhiilll-forgeenv.hf.space" def _summary(result, label: str) -> None: obs = result.observation if isinstance(result.observation, dict) else {} print(f"\n=== {label} ===") print(f"phase : {obs.get('current_phase')}") print(f"task_id : {obs.get('task_id')}") print(f"target_category : {obs.get('target_category')}") print(f"reward : {result.reward}") print(f"done : {result.done}") breakdown = obs.get("reward_breakdown") if breakdown: print("reward_breakdown:") print(json.dumps(breakdown, indent=2)) script = obs.get("script_content") or obs.get("broken_script") or "" if script: preview = script.splitlines()[:8] print("script preview :") for line in preview: print(f" | {line}") if len(script.splitlines()) > 8: print(" | ...") async def main(seed: int = 42) -> None: print(f"connecting to {ENV_URL} (seed={seed}) ...") client = GenericEnvClient(base_url=ENV_URL) res = await client.reset(seed=seed, options={"difficulty": "medium"}) _summary(res, "after reset()") target = res.observation.get("target_category", "RenameApiCall") if isinstance(res.observation, dict) else "RenameApiCall" res = await client.step(GenericAction( breakage={"action_type": "breakage", "primitive_type": target, "params": {}}, repair=None, )) _summary(res, "after drift step (Challenger)") # empty diff = no-op repair: shows the verifier marking the script as still broken res = await client.step(GenericAction( breakage=None, repair={"action_type": "repair", "unified_diff": ""}, )) _summary(res, "after repair step (Solver, no-op)") print("\nOK -- reset + 2 steps round-trip the deployed env.") if __name__ == "__main__": import sys seed = int(sys.argv[1]) if len(sys.argv) > 1 else 42 asyncio.run(main(seed=seed))