File size: 2,558 Bytes
a15535e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | """Smoke-test the live ForgeEnv Space end-to-end via the OpenEnv client.
Runs one full episode against the deployed Space:
reset() -> drift-gen turn
step(DriftAction) -> repair turn
step(RepairAction) -> reward + verifier breakdown
This is the simplest possible "is the deployed env working?" check
and a clean standalone artifact for the hackathon writeup/video.
Usage::
python scripts/test_live_env.py
"""
from __future__ import annotations
import asyncio
import json
from openenv.core import GenericAction, GenericEnvClient
ENV_URL = "https://akhiilll-forgeenv.hf.space"
def _summary(result, label: str) -> None:
obs = result.observation if isinstance(result.observation, dict) else {}
print(f"\n=== {label} ===")
print(f"phase : {obs.get('current_phase')}")
print(f"task_id : {obs.get('task_id')}")
print(f"target_category : {obs.get('target_category')}")
print(f"reward : {result.reward}")
print(f"done : {result.done}")
breakdown = obs.get("reward_breakdown")
if breakdown:
print("reward_breakdown:")
print(json.dumps(breakdown, indent=2))
script = obs.get("script_content") or obs.get("broken_script") or ""
if script:
preview = script.splitlines()[:8]
print("script preview :")
for line in preview:
print(f" | {line}")
if len(script.splitlines()) > 8:
print(" | ...")
async def main(seed: int = 42) -> None:
print(f"connecting to {ENV_URL} (seed={seed}) ...")
client = GenericEnvClient(base_url=ENV_URL)
res = await client.reset(seed=seed, options={"difficulty": "medium"})
_summary(res, "after reset()")
target = res.observation.get("target_category", "RenameApiCall") if isinstance(res.observation, dict) else "RenameApiCall"
res = await client.step(GenericAction(
breakage={"action_type": "breakage", "primitive_type": target, "params": {}},
repair=None,
))
_summary(res, "after drift step (Challenger)")
# empty diff = no-op repair: shows the verifier marking the script as still broken
res = await client.step(GenericAction(
breakage=None,
repair={"action_type": "repair", "unified_diff": ""},
))
_summary(res, "after repair step (Solver, no-op)")
print("\nOK -- reset + 2 steps round-trip the deployed env.")
if __name__ == "__main__":
import sys
seed = int(sys.argv[1]) if len(sys.argv) > 1 else 42
asyncio.run(main(seed=seed))
|