dataset-builder / data3 /vllm_qwen_batch.py
DouDou
Upload data3/vllm_qwen_batch.py with huggingface_hub
4416f19 verified
from openai import OpenAI
client = OpenAI(
base_url="http://localhost:8000/v1",
api_key="none"
)
prompts = [
"解释一下为什么大模型需要量化。",
"列出三种常用的学习率调度方法。",
"写一个关于未来城市的科幻短段落。",
]
def batch_chat(prompts):
responses = []
for p in prompts:
resp = client.chat.completions.create(
model="Qwen/Qwen3-30B-A3B-GPTQ-Int4",
messages=[{"role": "user", "content": p}],
max_tokens=256
)
responses.append(resp.choices[0].message.content)
return responses
results = batch_chat(prompts)
for i, r in enumerate(results):
print(f"\n=== Prompt {i+1} ===")
print("Prompt:", prompts[i])
print("Response:", r)