Spaces:

uvpatel7271
/

openenv-python-env

Sleeping

App Files Files Community

openenv-python-env / tests /test_inference_output.py

uvpatel7271

Upload folder using huggingface_hub

3ec70de verified 25 days ago

raw

history blame contribute delete

2.16 kB

	import os
	import re
	import subprocess
	import sys
	from pathlib import Path

	from tasks import task_ids


	ROOT = Path(__file__).resolve().parents[1]
	START_RE = re.compile(r"^\[START\] task=([a-z0-9-]+)$")
	STEP_RE = re.compile(r"^\[STEP\] step=(\d+) reward=(-?\d+(?:\.\d+)?)$")
	END_RE = re.compile(r"^\[END\] task=([a-z0-9-]+) score=(\d+(?:\.\d+)?) steps=(\d+)$")


	def test_inference_emits_structured_stdout_for_all_tasks():
	env = os.environ.copy()
	env.pop("API_BASE_URL", None)
	env.pop("HF_TOKEN", None)
	env["MODEL_NAME"] = "mock-model"

	result = subprocess.run(
	[sys.executable, "inference.py"],
	cwd=ROOT,
	capture_output=True,
	text=True,
	timeout=120,
	env=env,
	check=False,
	)

	assert result.returncode == 0
	assert "[START]" not in result.stderr
	assert "[STEP]" not in result.stderr
	assert "[END]" not in result.stderr

	lines = [line.strip() for line in result.stdout.splitlines() if line.strip()]
	expected_tasks = task_ids()
	seen_tasks = []
	line_index = 0

	while line_index < len(lines):
	start_match = START_RE.match(lines[line_index])
	assert start_match, f"Invalid START line: {lines[line_index]}"
	task_id = start_match.group(1)
	seen_tasks.append(task_id)
	line_index += 1

	step_count = 0
	while line_index < len(lines) and STEP_RE.match(lines[line_index]):
	step_count += 1
	step_match = STEP_RE.match(lines[line_index])
	assert step_match is not None
	assert int(step_match.group(1)) == step_count
	reward = float(step_match.group(2))
	assert -1.0 <= reward <= 1.0
	line_index += 1

	assert step_count >= 1
	assert line_index < len(lines), "Missing END line"
	end_match = END_RE.match(lines[line_index])
	assert end_match, f"Invalid END line: {lines[line_index]}"
	assert end_match.group(1) == task_id
	assert 0.0 <= float(end_match.group(2)) <= 1.0
	assert int(end_match.group(3)) == step_count
	line_index += 1

	assert seen_tasks == expected_tasks