Spaces:

agentDebugger
/

AgentDebugger-training-v3

Running

shank

Add Gradio training monitor and fix subprocess python path

b92ad01 11 days ago

1.85 kB

	import json
	import subprocess
	import tempfile
	import os
	import sys

	def test_passes(code, func, inp, expected):
	if isinstance(inp, (list, tuple)):
	args = ', '.join(repr(x) for x in inp)
	else:
	args = repr(inp)

	script = f"""{code}

	try:
	r = {func}({args})
	expected = {repr(expected)}
	print("PASS" if r == expected else f"FAIL: got {{r}}")
	except Exception as e:
	print(f"ERROR: {{e}}")
	"""
	try:
	with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
	f.write(script)
	fname = f.name
	r = subprocess.run(
	[sys.executable, fname],
	capture_output=True, text=True, timeout=5
	)
	os.unlink(fname)
	return 'PASS' in r.stdout
	except:
	return False

	for tier in [1, 2, 3]:
	bugs = [json.loads(l) for l in open(f'data/bugs_tier{tier}.jsonl') if l.strip()]

	broken_original = []
	buggy_not_failing = []

	for b in bugs:
	orig_passes = all(
	test_passes(b['original_code'], b['function_name'],
	t['input'], t['expected_output'])
	for t in b['test_cases']
	)
	buggy_fails_some = any(
	not test_passes(b['buggy_code'], b['function_name'],
	t['input'], t['expected_output'])
	for t in b['test_cases']
	)

	if not orig_passes:
	broken_original.append(b['id'])
	if not buggy_fails_some:
	buggy_not_failing.append(b['id'])

	print(f'\nTier {tier}:')
	if broken_original:
	print(f' BROKEN original_code: {broken_original}')
	if buggy_not_failing:
	print(f' BUGGY code not failing: {buggy_not_failing}')
	if not broken_original and not buggy_not_failing:
	print(f' All good!')