| #!/bin/bash |
| |
| |
| |
|
|
| set -e |
|
|
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| cd "$SCRIPT_DIR" |
|
|
| |
| PROVIDER="${1:-ollama}" |
| MODEL="${2:-qwen2.5-coder:32b}" |
| MAX_PROBLEMS="${3:-20}" |
|
|
| echo "========================================" |
| echo "Stack 2.9 HumanEval Quick Evaluation" |
| echo "========================================" |
| echo "Provider: $PROVIDER" |
| echo "Model: $MODEL" |
| echo "Problems: $MAX_PROBLEMS" |
| echo "" |
|
|
| |
| if command -v vllm &> /dev/null; then |
| USE_VLLM="--use-vllm" |
| echo "β vLLM detected - will use for faster inference" |
| else |
| USE_VLLM="" |
| echo "β vLLM not found - using standard inference" |
| fi |
|
|
| |
| case "$PROVIDER" in |
| ollama) |
| if command -v ollama &> /dev/null; then |
| echo "β Ollama available" |
| |
| if curl -s http://localhost:11434/api/tags &> /dev/null; then |
| echo "β Ollama server running" |
| else |
| echo "β Ollama server not running - start with: ollama serve" |
| fi |
| else |
| echo "β Ollama not installed - will attempt anyway" |
| fi |
| ;; |
| openai) |
| if [ -z "$OPENAI_API_KEY" ]; then |
| echo "β OPENAI_API_KEY not set" |
| else |
| echo "β OpenAI API key configured" |
| fi |
| ;; |
| anthropic) |
| if [ -z "$ANTHROPIC_API_KEY" ]; then |
| echo "β ANTHROPIC_API_KEY not set" |
| else |
| echo "β Anthropic API key configured" |
| fi |
| ;; |
| esac |
|
|
| echo "" |
| echo "Running evaluation..." |
| echo "----------------------------------------" |
|
|
| |
| python3 -m benchmarks.human_eval \ |
| --provider "$PROVIDER" \ |
| --model "$MODEL" \ |
| --max-problems "$MAX_PROBLEMS" \ |
| --timeout 30 \ |
| $USE_VLLM |
|
|
| echo "" |
| echo "========================================" |
| echo "Evaluation complete!" |
| echo "========================================" |
| echo "" |
| echo "Results saved to: results/humaneval.json" |
| echo "" |
| echo "To run full 164-problem benchmark:" |
| echo " 1. Download full HumanEval dataset" |
| echo " 2. Use GPU with 80GB VRAM (A100/H100)" |
| echo " 3. See HUMAN_EVAL_PLAN.md for details" |