Prepper_Model / app.py
Znilsson's picture
Update app.py
de8eaee verified
import os
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
BASE = "microsoft/phi-3-mini-4k-instruct"
ADAPTER = "Znilsson/survivalai-phi3-lora" # <-- replace if your adapter repo ID differs
TOKEN = os.environ.get("HF_TOKEN")
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(BASE, trust_remote_code=True)
print("Loading base model (fp16)...")
model = AutoModelForCausalLM.from_pretrained(
BASE,
dtype=torch.float16,
device_map="auto",
trust_remote_code=True,
low_cpu_mem_usage=True,
)
print("Attaching + merging LoRA adapter...")
model = PeftModel.from_pretrained(model, ADAPTER, token=TOKEN)
model = model.merge_and_unload()
model.eval()
def chat(message, history):
prompt = ""
for user, assistant in history:
prompt += f"<|user|>\n{user}<|end|>\n<|assistant|>\n{assistant}<|end|>\n"
prompt += f"<|user|>\n{message}<|end|>\n<|assistant|>\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
out = model.generate(
**inputs,
max_new_tokens=400,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
)
resp = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
return resp.strip()
demo = gr.ChatInterface(
fn=chat,
title="SurvivalAI",
description="Fine-tuned Phi-3-mini on survival & emergency preparedness corpus.",
)
if __name__ == "__main__":
demo.launch()