shank
Add Gradio training monitor and fix subprocess python path
b92ad01
import json
import subprocess
import tempfile
import os
import sys
def test_passes(code, func, inp, expected):
if isinstance(inp, (list, tuple)):
args = ', '.join(repr(x) for x in inp)
else:
args = repr(inp)
script = f"""{code}
try:
r = {func}({args})
expected = {repr(expected)}
print("PASS" if r == expected else f"FAIL: got {{r}}")
except Exception as e:
print(f"ERROR: {{e}}")
"""
try:
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
f.write(script)
fname = f.name
r = subprocess.run(
[sys.executable, fname],
capture_output=True, text=True, timeout=5
)
os.unlink(fname)
return 'PASS' in r.stdout
except:
return False
for tier in [1, 2, 3]:
bugs = [json.loads(l) for l in open(f'data/bugs_tier{tier}.jsonl') if l.strip()]
broken_original = []
buggy_not_failing = []
for b in bugs:
orig_passes = all(
test_passes(b['original_code'], b['function_name'],
t['input'], t['expected_output'])
for t in b['test_cases']
)
buggy_fails_some = any(
not test_passes(b['buggy_code'], b['function_name'],
t['input'], t['expected_output'])
for t in b['test_cases']
)
if not orig_passes:
broken_original.append(b['id'])
if not buggy_fails_some:
buggy_not_failing.append(b['id'])
print(f'\nTier {tier}:')
if broken_original:
print(f' BROKEN original_code: {broken_original}')
if buggy_not_failing:
print(f' BUGGY code not failing: {buggy_not_failing}')
if not broken_original and not buggy_not_failing:
print(f' All good!')