DouDou commited on
Commit
4416f19
·
verified ·
1 Parent(s): 89c39b8

Upload data3/vllm_qwen_batch.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. data3/vllm_qwen_batch.py +30 -0
data3/vllm_qwen_batch.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+
3
+ client = OpenAI(
4
+ base_url="http://localhost:8000/v1",
5
+ api_key="none"
6
+ )
7
+
8
+ prompts = [
9
+ "解释一下为什么大模型需要量化。",
10
+ "列出三种常用的学习率调度方法。",
11
+ "写一个关于未来城市的科幻短段落。",
12
+ ]
13
+
14
+ def batch_chat(prompts):
15
+ responses = []
16
+ for p in prompts:
17
+ resp = client.chat.completions.create(
18
+ model="Qwen/Qwen3-30B-A3B-GPTQ-Int4",
19
+ messages=[{"role": "user", "content": p}],
20
+ max_tokens=256
21
+ )
22
+ responses.append(resp.choices[0].message.content)
23
+ return responses
24
+
25
+ results = batch_chat(prompts)
26
+
27
+ for i, r in enumerate(results):
28
+ print(f"\n=== Prompt {i+1} ===")
29
+ print("Prompt:", prompts[i])
30
+ print("Response:", r)