import os
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread

hf_token = os.getenv("HF_TOKEN")
model_id = "ZyperAI/Z-AI-0.1-1.1B-Code.web"

# Professional loading logic: Since Gradio 6.x runs as a persistent server, 
# global variables are naturally 'cached' for the duration of the process.
print("Loading model and tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
    model_id, 
    token=hf_token, 
    use_fast=False
)
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    torch_dtype=torch.float32, 
    device_map="cpu",
    token=hf_token
)
print("Model loaded successfully.")

def generate_code(prompt, history):
    messages = history + [{"role": "user", "content": prompt}]
    
    # Prepare inputs using the model's chat template
    inputs = tokenizer.apply_chat_template(
        messages, 
        add_generation_prompt=True, 
        return_tensors="pt"
    ).to("cpu")
    
    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    
    generation_kwargs = dict(
        inputs=inputs,
        streamer=streamer,
        max_new_tokens=1024,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )
    
    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()
    
    response = ""
    for new_text in streamer:
        response += new_text
        yield response

# Gradio 6.x UI setup
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
    gr.Markdown("# ⚡ **Z-AI Web Coder**")
    
    # In Gradio 6, type="messages" is the standard for the chatbot component
    chatbot = gr.Chatbot(height=500, show_copy_button=True, type="messages")
    with gr.Row():
        msg = gr.Textbox(
            placeholder="E.g., Create a responsive navigation bar with CSS...", 
            show_label=False, 
            scale=9
        )
        submit = gr.Button("Build", variant="primary", scale=1)
    msg.submit(generate_code, [msg, chatbot], [chatbot])
    submit.click(generate_code, [msg, chatbot], [chatbot])
    msg.submit(lambda: "", None, [msg])
    submit.click(lambda: "", None, [msg])

if __name__ == "__main__":
    demo.launch()