agent-zero-training-scripts / eval_baseline_v2.py
wheattoast11's picture
Upload eval_baseline_v2.py with huggingface_hub
b87b103 verified
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "lighteval[vllm]>=0.6.0",
# "torch>=2.0.0",
# "transformers>=4.40.0",
# "accelerate>=0.30.0",
# "vllm>=0.4.0",
# ]
# ///
"""Evaluate baseline LiquidAI/LFM2.5-1.2B-Instruct with lighteval vllm."""
import subprocess
import sys
# First check lighteval vllm --help to see valid options
help_result = subprocess.run(
["lighteval", "vllm", "--help"],
capture_output=True, text=True
)
print("=== lighteval vllm --help ===")
print(help_result.stdout)
print(help_result.stderr)
# Try running with minimal args
model = "LiquidAI/LFM2.5-1.2B-Instruct"
tasks = "leaderboard|mmlu|5,leaderboard|arc:challenge|25,leaderboard|truthfulqa:mc|0"
cmd = ["lighteval", "vllm", model, tasks, "--trust-remote-code", "--use-chat-template", "--output-dir", "./eval_baseline"]
print(f"\n=== Running: {' '.join(cmd)} ===")
result = subprocess.run(cmd)
sys.exit(result.returncode)