ChatSpartacus / app.py
OzTianlu's picture
Update app.py
d8d0150 verified
import spaces
import torch
from threading import Thread
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import gradio as gr
MODEL_ID = "NoesisLab/Spartacus-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
"NoesisLab/Spartacus-1B-Instruct",
trust_remote_code=True,
tie_word_embeddings=False # 尝试强制关闭权重绑定检查
)
@spaces.GPU
def respond(message, history):
messages = [{"role": "system", "content": "You are Spartacus, a helpful assistant."}]
for msg in history:
messages.append({"role": msg["role"], "content": msg["content"]})
messages.append({"role": "user", "content": message})
input_ids = tokenizer.apply_chat_template(
messages, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generate_kwargs = dict(
input_ids=input_ids,
streamer=streamer,
temperature=0.5,
top_p=0.9,
do_sample=True,
)
thread = Thread(target=model.generate, kwargs=generate_kwargs)
thread.start()
response = ""
for token in streamer:
response += token
yield response
demo = gr.ChatInterface(
fn=respond,
title="Spartacus Chat",
description="Chat with NoesisLab/Spartacus-1B-Instruct",
)
if __name__ == "__main__":
demo.launch()