import gradio as gr
import spaces
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import os

BASE_MODEL = "Qwen/Qwen2.5-Math-7B-Instruct"
ADAPTER_REPO = "billwang37/mathbio-qwen-7b"
HF_TOKEN = os.environ.get("HF_TOKEN")

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, token=HF_TOKEN)

print("Loading base model...")
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

print("Loading LoRA adapter...")
model = PeftModel.from_pretrained(base_model, ADAPTER_REPO, token=HF_TOKEN)
model.eval()
print("Model ready.")

SYSTEM_PROMPT = "You are MathBioAgent, an expert AI assistant specialized in mathematical biology, epidemiology, operator learning, and partial differential equations."

@spaces.GPU(duration=60)
def chat(message, history):
    def extract_text(content):
        if isinstance(content, str):
            return content
        if isinstance(content, list):
            return " ".join(c.get("text", "") if isinstance(c, dict) else str(c) for c in content)
        return str(content)

    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    for h in history:
        if isinstance(h, dict):
            messages.append({"role": h["role"], "content": extract_text(h.get("content", ""))})
        elif isinstance(h, (list, tuple)) and len(h) == 2:
            messages.append({"role": "user", "content": extract_text(h[0])})
            messages.append({"role": "assistant", "content": extract_text(h[1])})
    messages.append({"role": "user", "content": extract_text(message)})

    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=1024,
            temperature=0.3,
            do_sample=True,
            top_p=0.9,
        )
    response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
    return response

demo = gr.ChatInterface(
    fn=chat,
    title="WWang Lab — MathBio AI",
    description="**Developed by Weinan Wang, University of Oklahoma.** A specialized mathematical biology LLM fine-tuned from Qwen2.5-Math-7B on 27,000 arxiv-derived examples covering epidemic modeling, PDEs, operator learning, and mathematical biology. Research preview.",
    examples=[
        "What is R0 for an SIR model with beta=0.4 and gamma=0.1?",
        "Derive the stability condition for the SEIR endemic equilibrium.",
        "Explain the Keller-Segel chemotaxis model.",
    ],
)

demo.launch()