ExtractDocs / app.py
RemiProAtos's picture
replace pytesseract by PIL preprocessing & Mitral vision model
25af3cf verified
Raw
History Blame Contribute Delete
2.95 kB
import base64
import gradio as gr
import io
import os
from PIL import Image, ImageEnhance, ImageFilter
from mistralai.client import Mistral
from dotenv import load_dotenv
load_dotenv()
client = Mistral(api_key=os.environ["MISTRAL_API_KEY"])
def preprocess(image: Image.Image) -> Image.Image:
# Upscale if the image is small (photos taken from distance / low-res)
w, h = image.size
if w < 1500:
image = image.resize((w * 2, h * 2), Image.LANCZOS)
# Convert to grayscale — removes colour noise irrelevant to text
image = image.convert("L")
# Sharpen edges before contrast so fine strokes are preserved
image = image.filter(ImageFilter.SHARPEN)
# Boost contrast to make text pop against background
image = ImageEnhance.Contrast(image).enhance(2.0)
# Second sharpness pass to crisp up letter edges
image = ImageEnhance.Sharpness(image).enhance(2.0)
return image
def image_to_base64(image: Image.Image) -> str:
buffer = io.BytesIO()
image.save(buffer, format="JPEG", quality=95)
return base64.b64encode(buffer.getvalue()).decode("utf-8")
def extract_info(image: Image.Image) -> str:
image = preprocess(image)
b64 = image_to_base64(image)
response = client.chat.complete(
model="pixtral-12b-2409",
messages=[
{
"role": "system",
"content": (
"Tu es un assistant d'extraction de données. "
"Extrait les informations demandées et renvoie un objet JSON propre. "
"N'inclue aucune explication ni mise en forme Markdown. "
"Formate le résultat en JSON simple en utilisant les clés suivantes uniquement: "
"Nom, Prénom, Numéro de voie, Type de voie, Nom de la voie, "
"Complément du numéro de voie, Complément d'adresse, Code postal, Ville. "
"N'incorpore aucune donnée supplémentaire. "
"Si tu ne trouves pas la donnée, indique n/a en valeur."
),
},
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": f"data:image/jpeg;base64,{b64}",
},
{
"type": "text",
"text": "Extrait les informations structurées de ce document et renvoie uniquement le JSON.",
},
],
},
],
)
return response.choices[0].message.content
demo = gr.Interface(
fn=extract_info,
inputs=gr.Image(type="pil", label="Upload Image"),
outputs=gr.Textbox(label="Extracted JSON"),
title="Doc Xtract",
description="Upload an image to extract structured information as JSON.",
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0")