# /// script
# requires-python = ">=3.10"
# dependencies = [
#     "lighteval[vllm]>=0.6.0",
#     "torch>=2.0.0",
#     "transformers>=4.40.0",
#     "accelerate>=0.30.0",
#     "vllm>=0.4.0",
# ]
# ///

"""Evaluate baseline LiquidAI/LFM2.5-1.2B-Instruct with lighteval vllm."""

import subprocess
import sys

# First check lighteval vllm --help to see valid options
help_result = subprocess.run(
    ["lighteval", "vllm", "--help"],
    capture_output=True, text=True
)
print("=== lighteval vllm --help ===")
print(help_result.stdout)
print(help_result.stderr)

# Try running with minimal args
model = "LiquidAI/LFM2.5-1.2B-Instruct"
tasks = "leaderboard|mmlu|5,leaderboard|arc:challenge|25,leaderboard|truthfulqa:mc|0"

cmd = ["lighteval", "vllm", model, tasks, "--trust-remote-code", "--use-chat-template", "--output-dir", "./eval_baseline"]
print(f"\n=== Running: {' '.join(cmd)} ===")
result = subprocess.run(cmd)
sys.exit(result.returncode)