#!/usr/bin/env python3 """Test runner using direct script execution""" import subprocess import sys script = "/Users/walidsobhi/.openclaw/workspace/stack-2.9/stack-2.9-eval/human_eval.py" print("="*50) print("Running HumanEval directly") print("="*50) # Run with limited problems for quick test result = subprocess.run( [sys.executable, script, "--model", "test-run", "--timeout", "5"], capture_output=True, text=True, timeout=60 ) print("STDOUT:", result.stdout[:2000] if result.stdout else "(empty)") print("STDERR:", result.stderr[:500] if result.stderr else "(empty)") print("Return code:", result.returncode) print("="*50)