| #!/usr/bin/env python3 | |
| """Test runner using direct script execution""" | |
| import subprocess | |
| import sys | |
| script = "/Users/walidsobhi/.openclaw/workspace/stack-2.9/stack-2.9-eval/human_eval.py" | |
| print("="*50) | |
| print("Running HumanEval directly") | |
| print("="*50) | |
| # Run with limited problems for quick test | |
| result = subprocess.run( | |
| [sys.executable, script, "--model", "test-run", "--timeout", "5"], | |
| capture_output=True, | |
| text=True, | |
| timeout=60 | |
| ) | |
| print("STDOUT:", result.stdout[:2000] if result.stdout else "(empty)") | |
| print("STDERR:", result.stderr[:500] if result.stderr else "(empty)") | |
| print("Return code:", result.returncode) | |
| print("="*50) |