openenv-python-env / tests /test_inference_output.py
uvpatel7271's picture
Upload folder using huggingface_hub
3ec70de verified
import os
import re
import subprocess
import sys
from pathlib import Path
from tasks import task_ids
ROOT = Path(__file__).resolve().parents[1]
START_RE = re.compile(r"^\[START\] task=([a-z0-9-]+)$")
STEP_RE = re.compile(r"^\[STEP\] step=(\d+) reward=(-?\d+(?:\.\d+)?)$")
END_RE = re.compile(r"^\[END\] task=([a-z0-9-]+) score=(\d+(?:\.\d+)?) steps=(\d+)$")
def test_inference_emits_structured_stdout_for_all_tasks():
env = os.environ.copy()
env.pop("API_BASE_URL", None)
env.pop("HF_TOKEN", None)
env["MODEL_NAME"] = "mock-model"
result = subprocess.run(
[sys.executable, "inference.py"],
cwd=ROOT,
capture_output=True,
text=True,
timeout=120,
env=env,
check=False,
)
assert result.returncode == 0
assert "[START]" not in result.stderr
assert "[STEP]" not in result.stderr
assert "[END]" not in result.stderr
lines = [line.strip() for line in result.stdout.splitlines() if line.strip()]
expected_tasks = task_ids()
seen_tasks = []
line_index = 0
while line_index < len(lines):
start_match = START_RE.match(lines[line_index])
assert start_match, f"Invalid START line: {lines[line_index]}"
task_id = start_match.group(1)
seen_tasks.append(task_id)
line_index += 1
step_count = 0
while line_index < len(lines) and STEP_RE.match(lines[line_index]):
step_count += 1
step_match = STEP_RE.match(lines[line_index])
assert step_match is not None
assert int(step_match.group(1)) == step_count
reward = float(step_match.group(2))
assert -1.0 <= reward <= 1.0
line_index += 1
assert step_count >= 1
assert line_index < len(lines), "Missing END line"
end_match = END_RE.match(lines[line_index])
assert end_match, f"Invalid END line: {lines[line_index]}"
assert end_match.group(1) == task_id
assert 0.0 <= float(end_match.group(2)) <= 1.0
assert int(end_match.group(3)) == step_count
line_index += 1
assert seen_tasks == expected_tasks