| from transformers import AutoModelForCausalLM, AutoTokenizer |
| import torch |
|
|
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
| DTYPE = torch.float32 |
|
|
| model_name = "doubleblind/DeepSeek-R1-Distill-QweNSA-1.5B" |
| tok = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", trust_remote_code=True) |
|
|
| model = AutoModelForCausalLM.from_pretrained( |
| model_name, |
| torch_dtype=DTYPE, |
| device_map="auto" if DEVICE == "cuda" else None, |
| trust_remote_code=True) |
|
|
| prompt = tok.apply_chat_template( |
| [ |
| {"role": "system", |
| "content": r"You are a helpful assistant. Place your final answer in \boxed{}."}, |
| {"role": "user", "content": "What is 1 + 1?"}, |
| ], |
| tokenize=True, |
| add_generation_prompt=True, |
| return_tensors="pt") |
|
|
| out = model.generate(input_ids=prompt.to(model.device), max_new_tokens=128, do_sample=True, temperature=0.6, top_p=0.95) |
| print(tok.decode(out[0], skip_special_tokens=True)) |