File size: 2,377 Bytes
f80360c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | #!/bin/bash
# Stack 2.9 Quick HumanEval Evaluation Wrapper
# Usage: ./quick_human_eval.sh [provider] [model] [num_samples]
# Example: ./quick_human_eval.sh ollama qwen2.5-coder:32b 20
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
# Defaults
PROVIDER="${1:-ollama}"
MODEL="${2:-qwen2.5-coder:32b}"
MAX_PROBLEMS="${3:-20}"
echo "========================================"
echo "Stack 2.9 HumanEval Quick Evaluation"
echo "========================================"
echo "Provider: $PROVIDER"
echo "Model: $MODEL"
echo "Problems: $MAX_PROBLEMS"
echo ""
# Check if vllm is available
if command -v vllm &> /dev/null; then
USE_VLLM="--use-vllm"
echo "✓ vLLM detected - will use for faster inference"
else
USE_VLLM=""
echo "⚠ vLLM not found - using standard inference"
fi
# Check provider availability
case "$PROVIDER" in
ollama)
if command -v ollama &> /dev/null; then
echo "✓ Ollama available"
# Check if model is loaded
if curl -s http://localhost:11434/api/tags &> /dev/null; then
echo "✓ Ollama server running"
else
echo "⚠ Ollama server not running - start with: ollama serve"
fi
else
echo "⚠ Ollama not installed - will attempt anyway"
fi
;;
openai)
if [ -z "$OPENAI_API_KEY" ]; then
echo "⚠ OPENAI_API_KEY not set"
else
echo "✓ OpenAI API key configured"
fi
;;
anthropic)
if [ -z "$ANTHROPIC_API_KEY" ]; then
echo "⚠ ANTHROPIC_API_KEY not set"
else
echo "✓ Anthropic API key configured"
fi
;;
esac
echo ""
echo "Running evaluation..."
echo "----------------------------------------"
# Run the evaluation
python3 -m benchmarks.human_eval \
--provider "$PROVIDER" \
--model "$MODEL" \
--max-problems "$MAX_PROBLEMS" \
--timeout 30 \
$USE_VLLM
echo ""
echo "========================================"
echo "Evaluation complete!"
echo "========================================"
echo ""
echo "Results saved to: results/humaneval.json"
echo ""
echo "To run full 164-problem benchmark:"
echo " 1. Download full HumanEval dataset"
echo " 2. Use GPU with 80GB VRAM (A100/H100)"
echo " 3. See HUMAN_EVAL_PLAN.md for details" |