Stack-2-9-finetuned / stack /eval /test_direct.py
walidsobhie-code
refactor: Squeeze folders further - cleaner structure
65888d5
#!/usr/bin/env python3
"""Test runner using direct script execution"""
import subprocess
import sys
script = "/Users/walidsobhi/.openclaw/workspace/stack-2.9/stack-2.9-eval/human_eval.py"
print("="*50)
print("Running HumanEval directly")
print("="*50)
# Run with limited problems for quick test
result = subprocess.run(
[sys.executable, script, "--model", "test-run", "--timeout", "5"],
capture_output=True,
text=True,
timeout=60
)
print("STDOUT:", result.stdout[:2000] if result.stdout else "(empty)")
print("STDERR:", result.stderr[:500] if result.stderr else "(empty)")
print("Return code:", result.returncode)
print("="*50)