import gradio as gr
from transformers import AutoImageProcessor, AutoModel
import torch
from PIL import Image
import json
import numpy as np
import faiss

# Init similarity search AI model and processor
device = torch.device("cpu")
processor = AutoImageProcessor.from_pretrained(
    "facebook/dinov3-vitb16-pretrain-lvd1689m"
)
model = AutoModel.from_pretrained("facebook/dinov3-vitb16-pretrain-lvd1689m")
model.config.return_dict = False  # Set return_dict to False for JIT tracing
model.to(device)
model.eval()  # Set model to evaluation mode for inference

# Prepare an example input for tracing
example_input = torch.rand(1, 3, 224, 224).to(device)  # Adjust size if needed
traced_model = torch.jit.trace(model, example_input)
traced_model = traced_model.to(device)

# Load faiss index
index = faiss.read_index("xbgp-faiss.index")

# Load faiss map
with open("xbgp-faiss-map.json", "r") as f:
    images = json.load(f)


def process_image(image):
    """
    Process the image and extract features using the DINOv3 model.
    """
    # Convert to RGB if it isn't already
    if image.mode != "RGB":
        image = image.convert("RGB")

    # Resize to 224px while maintaining aspect ratio
    width, height = image.size
    if width < height:
        w_percent = 224 / float(width)
        new_width = 224
        new_height = int(float(height) * float(w_percent))
    else:
        h_percent = 224 / float(height)
        new_height = 224
        new_width = int(float(width) * float(h_percent))
    image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)

    # Extract the features from the uploaded image
    with torch.no_grad():
        inputs = processor(images=image, return_tensors="pt")["pixel_values"].to(device)
        outputs = traced_model(inputs)

        # Normalize the features before search
        embeddings = outputs[0].mean(dim=1)
        vector = embeddings.detach().cpu().numpy()
        vector = np.float32(vector)
        faiss.normalize_L2(vector)

    # Read the index file and perform search of top 50 images
    distances, indices = index.search(vector, 50)

    matches = []
    for idx, matching_gamerpic in enumerate(indices[0]):
        gamerpic = {}
        gamerpic["id"] = images[matching_gamerpic]
        gamerpic["score"] = str(round((1 / (distances[0][idx] + 1) * 100), 2)) + "%"
        matches.append(gamerpic)

    return matches


# Create a Gradio interface
iface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil"),
    outputs="json",
    title="Xbox Gamerpic Finder - DINOv3",
    description="Upload an image to find similar Xbox 360 gamerpics using Meta's DINOv3 vision model",
).queue()

# Launch the Gradio app
iface.launch()