import os

CPU_THREADS = 16

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = str(CPU_THREADS)
os.environ["MKL_NUM_THREADS"] = str(CPU_THREADS)
os.environ["OPENBLAS_NUM_THREADS"] = str(CPU_THREADS)
os.environ["NUMEXPR_NUM_THREADS"] = str(CPU_THREADS)

import torch
import gradio as gr
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification

MODEL_ID = "MoritzLaurer/deberta-v3-large-zeroshot-v2.0"

torch.set_num_threads(CPU_THREADS)
torch.set_num_interop_threads(1)

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
model.eval()

label2id = {k.lower(): v for k, v in model.config.label2id.items()}
entail_id = label2id.get("entailment", 2)

def _softmax(x):
    x = x - np.max(x)
    e = np.exp(x)
    return e / np.sum(e)

def run_zero_shot(text, labels, hypothesis_template, multi_label, top_k):
    text = (text or "").strip()
    labels = (labels or "").strip()
    hypothesis_template = (hypothesis_template or "").strip() or "This text is about {}"

    if not text:
        return {"error": "Enter some text."}

    candidate_labels = [x.strip() for x in labels.split(",") if x.strip()]
    if not candidate_labels:
        return {"error": "Enter at least 1 label (comma-separated)."}

    scores = []
    with torch.inference_mode():
        for lab in candidate_labels:
            hyp = hypothesis_template.format(lab)
            inputs = tokenizer(text, hyp, return_tensors="pt", truncation=True)
            logits = model(**inputs).logits[0].float().cpu().numpy()
            score = float(_softmax(logits)[entail_id])
            scores.append(score)

    scores_np = np.array(scores, dtype=np.float32)

    if bool(multi_label):
        out_scores = scores_np
    else:
        out_scores = _softmax(scores_np)

    pairs = list(zip(candidate_labels, out_scores.tolist()))
    pairs.sort(key=lambda x: x[1], reverse=True)
    pairs = pairs[: max(1, int(top_k))]

    return {
        "cpu_threads": CPU_THREADS,
        "top": {"label": pairs[0][0], "confidence_pct": round(pairs[0][1] * 100, 2)},
        "all": [{"label": k, "confidence_pct": round(v * 100, 2)} for k, v in pairs],
    }

demo = gr.Interface(
    fn=run_zero_shot,
    inputs=[
        gr.Textbox(label="Text", lines=4, value="I am wahhhh"),
        gr.Textbox(label="Candidate Labels (comma-separated)", value="sad, happy, angry, neutral"),
        gr.Textbox(label="Hypothesis Template", value="This text is about {}"),
        gr.Checkbox(label="Multi-label", value=False),
        gr.Slider(label="Top-K to show", minimum=1, maximum=25, value=5, step=1),
    ],
    outputs=gr.JSON(label="Output"),
    title="Zero-Shot Classification (DeBERTa v3 Large, 16-core CPU)",
    flagging_mode="never",
)

if __name__ == "__main__":
    demo.launch()