Neon-tech commited on
Commit
da7701c
·
verified ·
1 Parent(s): ca14a5d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -0
app.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import gradio as gr
3
+
4
+ model_name = "Qwen/Qwen3.5-35B-A3B"
5
+
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
8
+
9
+ def chat(message, history):
10
+ messages = []
11
+ for user, assistant in history:
12
+ messages.append({"role": "user", "content": user})
13
+ messages.append({"role": "assistant", "content": assistant})
14
+ messages.append({"role": "user", "content": message})
15
+
16
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
17
+ inputs = tokenizer([text], return_tensors="pt").to(model.device)
18
+ outputs = model.generate(**inputs, max_new_tokens=512)
19
+ output = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
20
+ return output
21
+
22
+ gr.ChatInterface(chat).launch(server_name="0.0.0.0", server_port=7860)