from openai import OpenAI client = OpenAI( base_url="http://localhost:8000/v1", api_key="none" ) prompts = [ "解释一下为什么大模型需要量化。", "列出三种常用的学习率调度方法。", "写一个关于未来城市的科幻短段落。", ] def batch_chat(prompts): responses = [] for p in prompts: resp = client.chat.completions.create( model="Qwen/Qwen3-30B-A3B-GPTQ-Int4", messages=[{"role": "user", "content": p}], max_tokens=256 ) responses.append(resp.choices[0].message.content) return responses results = batch_chat(prompts) for i, r in enumerate(results): print(f"\n=== Prompt {i+1} ===") print("Prompt:", prompts[i]) print("Response:", r)