DouDou commited on
Upload data3/vllm_qwen_batch.py with huggingface_hub
Browse files- data3/vllm_qwen_batch.py +30 -0
data3/vllm_qwen_batch.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from openai import OpenAI
|
| 2 |
+
|
| 3 |
+
client = OpenAI(
|
| 4 |
+
base_url="http://localhost:8000/v1",
|
| 5 |
+
api_key="none"
|
| 6 |
+
)
|
| 7 |
+
|
| 8 |
+
prompts = [
|
| 9 |
+
"解释一下为什么大模型需要量化。",
|
| 10 |
+
"列出三种常用的学习率调度方法。",
|
| 11 |
+
"写一个关于未来城市的科幻短段落。",
|
| 12 |
+
]
|
| 13 |
+
|
| 14 |
+
def batch_chat(prompts):
|
| 15 |
+
responses = []
|
| 16 |
+
for p in prompts:
|
| 17 |
+
resp = client.chat.completions.create(
|
| 18 |
+
model="Qwen/Qwen3-30B-A3B-GPTQ-Int4",
|
| 19 |
+
messages=[{"role": "user", "content": p}],
|
| 20 |
+
max_tokens=256
|
| 21 |
+
)
|
| 22 |
+
responses.append(resp.choices[0].message.content)
|
| 23 |
+
return responses
|
| 24 |
+
|
| 25 |
+
results = batch_chat(prompts)
|
| 26 |
+
|
| 27 |
+
for i, r in enumerate(results):
|
| 28 |
+
print(f"\n=== Prompt {i+1} ===")
|
| 29 |
+
print("Prompt:", prompts[i])
|
| 30 |
+
print("Response:", r)
|