walidsobhie-code

refactor: Squeeze folders further - cleaner structure

65888d5 22 days ago

752 Bytes

	#!/usr/bin/env python3
	"""
	Run HumanEval benchmark - fixed path version
	"""

	import sys
	import os

	# Add the eval directory to path
	eval_dir = "/Users/walidsobhi/.openclaw/workspace/stack-2.9/stack-2.9-eval"
	sys.path.insert(0, eval_dir)
	os.chdir(eval_dir)

	# Now import and run
	from benchmarks.human_eval import HumanEval

	print("=" * 50)
	print("Running HumanEval Benchmark (Stub Mode)")
	print("=" * 50)

	# Run with 5 problems in stub mode
	benchmark = HumanEval(max_problems=5)
	results = benchmark.evaluate()

	print("\n" + "=" * 50)
	print("RESULTS:")
	print("=" * 50)
	print(f"Total: {results['total_cases']}")
	print(f"Passed: {results['pass_at_1']}")
	print(f"Accuracy: {results['accuracy']*100:.1f}%")
	print(f"Model: {results['model']}")
	print("=" * 50)