Spaces:
Running
Running
| import gradio as gr | |
| from transformers import AutoImageProcessor, AutoModel | |
| import torch | |
| from PIL import Image | |
| import json | |
| import numpy as np | |
| import faiss | |
| # Init similarity search AI model and processor | |
| device = torch.device("cpu") | |
| processor = AutoImageProcessor.from_pretrained( | |
| "facebook/dinov3-vitb16-pretrain-lvd1689m" | |
| ) | |
| model = AutoModel.from_pretrained("facebook/dinov3-vitb16-pretrain-lvd1689m") | |
| model.config.return_dict = False # Set return_dict to False for JIT tracing | |
| model.to(device) | |
| model.eval() # Set model to evaluation mode for inference | |
| # Prepare an example input for tracing | |
| example_input = torch.rand(1, 3, 224, 224).to(device) # Adjust size if needed | |
| traced_model = torch.jit.trace(model, example_input) | |
| traced_model = traced_model.to(device) | |
| # Load faiss index | |
| index = faiss.read_index("xbgp-faiss.index") | |
| # Load faiss map | |
| with open("xbgp-faiss-map.json", "r") as f: | |
| images = json.load(f) | |
| def process_image(image): | |
| """ | |
| Process the image and extract features using the DINOv3 model. | |
| """ | |
| # Convert to RGB if it isn't already | |
| if image.mode != "RGB": | |
| image = image.convert("RGB") | |
| # Resize to 224px while maintaining aspect ratio | |
| width, height = image.size | |
| if width < height: | |
| w_percent = 224 / float(width) | |
| new_width = 224 | |
| new_height = int(float(height) * float(w_percent)) | |
| else: | |
| h_percent = 224 / float(height) | |
| new_height = 224 | |
| new_width = int(float(width) * float(h_percent)) | |
| image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) | |
| # Extract the features from the uploaded image | |
| with torch.no_grad(): | |
| inputs = processor(images=image, return_tensors="pt")["pixel_values"].to(device) | |
| outputs = traced_model(inputs) | |
| # Normalize the features before search | |
| embeddings = outputs[0].mean(dim=1) | |
| vector = embeddings.detach().cpu().numpy() | |
| vector = np.float32(vector) | |
| faiss.normalize_L2(vector) | |
| # Read the index file and perform search of top 50 images | |
| distances, indices = index.search(vector, 50) | |
| matches = [] | |
| for idx, matching_gamerpic in enumerate(indices[0]): | |
| gamerpic = {} | |
| gamerpic["id"] = images[matching_gamerpic] | |
| gamerpic["score"] = str(round((1 / (distances[0][idx] + 1) * 100), 2)) + "%" | |
| matches.append(gamerpic) | |
| return matches | |
| # Create a Gradio interface | |
| iface = gr.Interface( | |
| fn=process_image, | |
| inputs=gr.Image(type="pil"), | |
| outputs="json", | |
| title="Xbox Gamerpic Finder - DINOv3", | |
| description="Upload an image to find similar Xbox 360 gamerpics using Meta's DINOv3 vision model", | |
| ).queue() | |
| # Launch the Gradio app | |
| iface.launch() | |