Spaces:
Build error
Build error
Merge pull request #39 from maribakulj/claude/fix-manifest-analysis-performance-BIMKl
Browse files- backend/app/api/v1/models_api.py +4 -0
- backend/app/models/database.py +5 -2
- backend/app/models/model_config_db.py +2 -1
- backend/app/services/ai/analyzer.py +1 -0
- backend/app/services/ai/base.py +9 -1
- backend/app/services/ai/provider_google_ai.py +1 -1
- backend/app/services/ai/provider_mistral.py +22 -11
- backend/app/services/ai/provider_vertex_key.py +1 -1
- backend/app/services/ai/provider_vertex_sa.py +1 -1
- backend/app/services/ingest/iiif_fetcher.py +119 -15
- backend/app/services/job_runner.py +1 -1
- backend/tests/test_image_pipeline.py +22 -11
- backend/tests/test_provider_mistral.py +24 -19
- frontend/src/lib/api.ts +3 -0
- frontend/src/pages/Admin.tsx +1 -1
backend/app/api/v1/models_api.py
CHANGED
|
@@ -45,6 +45,7 @@ class ModelSelectRequest(BaseModel):
|
|
| 45 |
model_id: str = Field(..., min_length=1, max_length=256)
|
| 46 |
provider_type: str = Field(..., min_length=1, max_length=64)
|
| 47 |
display_name: str = Field("", max_length=256)
|
|
|
|
| 48 |
|
| 49 |
|
| 50 |
class ModelConfigResponse(BaseModel):
|
|
@@ -54,6 +55,7 @@ class ModelConfigResponse(BaseModel):
|
|
| 54 |
provider_type: str
|
| 55 |
selected_model_id: str
|
| 56 |
selected_model_display_name: str
|
|
|
|
| 57 |
updated_at: datetime
|
| 58 |
|
| 59 |
|
|
@@ -133,6 +135,7 @@ async def set_corpus_model(
|
|
| 133 |
provider_type=body.provider_type,
|
| 134 |
selected_model_id=body.model_id,
|
| 135 |
selected_model_display_name=display_name,
|
|
|
|
| 136 |
updated_at=datetime.now(timezone.utc),
|
| 137 |
)
|
| 138 |
db.add(config)
|
|
@@ -140,6 +143,7 @@ async def set_corpus_model(
|
|
| 140 |
config.provider_type = body.provider_type
|
| 141 |
config.selected_model_id = body.model_id
|
| 142 |
config.selected_model_display_name = display_name
|
|
|
|
| 143 |
config.updated_at = datetime.now(timezone.utc)
|
| 144 |
|
| 145 |
await db.commit()
|
|
|
|
| 45 |
model_id: str = Field(..., min_length=1, max_length=256)
|
| 46 |
provider_type: str = Field(..., min_length=1, max_length=64)
|
| 47 |
display_name: str = Field("", max_length=256)
|
| 48 |
+
supports_vision: bool = Field(True)
|
| 49 |
|
| 50 |
|
| 51 |
class ModelConfigResponse(BaseModel):
|
|
|
|
| 55 |
provider_type: str
|
| 56 |
selected_model_id: str
|
| 57 |
selected_model_display_name: str
|
| 58 |
+
supports_vision: bool
|
| 59 |
updated_at: datetime
|
| 60 |
|
| 61 |
|
|
|
|
| 135 |
provider_type=body.provider_type,
|
| 136 |
selected_model_id=body.model_id,
|
| 137 |
selected_model_display_name=display_name,
|
| 138 |
+
supports_vision=body.supports_vision,
|
| 139 |
updated_at=datetime.now(timezone.utc),
|
| 140 |
)
|
| 141 |
db.add(config)
|
|
|
|
| 143 |
config.provider_type = body.provider_type
|
| 144 |
config.selected_model_id = body.model_id
|
| 145 |
config.selected_model_display_name = display_name
|
| 146 |
+
config.supports_vision = body.supports_vision
|
| 147 |
config.updated_at = datetime.now(timezone.utc)
|
| 148 |
|
| 149 |
await db.commit()
|
backend/app/models/database.py
CHANGED
|
@@ -32,10 +32,13 @@ engine = create_async_engine(
|
|
| 32 |
|
| 33 |
# Activer les clés étrangères SQLite (désactivées par défaut).
|
| 34 |
# Nécessaire pour que ondelete="CASCADE" / "SET NULL" fonctionne.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
@event.listens_for(engine.sync_engine, "connect")
|
| 36 |
def _set_sqlite_pragma(dbapi_conn, _connection_record):
|
| 37 |
-
|
| 38 |
-
cursor.close()
|
| 39 |
|
| 40 |
async_session_factory = async_sessionmaker(
|
| 41 |
engine,
|
|
|
|
| 32 |
|
| 33 |
# Activer les clés étrangères SQLite (désactivées par défaut).
|
| 34 |
# Nécessaire pour que ondelete="CASCADE" / "SET NULL" fonctionne.
|
| 35 |
+
# Note : on n'appelle PAS cursor.close() car avec aiosqlite le curseur
|
| 36 |
+
# retourne une coroutine pour close(), ce qui provoque un RuntimeWarning
|
| 37 |
+
# « coroutine 'Cursor.close' was never awaited ». Le curseur PRAGMA est
|
| 38 |
+
# éphémère et libéré automatiquement.
|
| 39 |
@event.listens_for(engine.sync_engine, "connect")
|
| 40 |
def _set_sqlite_pragma(dbapi_conn, _connection_record):
|
| 41 |
+
dbapi_conn.execute("PRAGMA foreign_keys=ON")
|
|
|
|
| 42 |
|
| 43 |
async_session_factory = async_sessionmaker(
|
| 44 |
engine,
|
backend/app/models/model_config_db.py
CHANGED
|
@@ -8,7 +8,7 @@ La clé API n'est JAMAIS stockée ici (R06) — elle reste dans l'environnement.
|
|
| 8 |
from datetime import datetime
|
| 9 |
|
| 10 |
# 2. third-party
|
| 11 |
-
from sqlalchemy import DateTime, ForeignKey, String
|
| 12 |
from sqlalchemy.orm import Mapped, mapped_column
|
| 13 |
|
| 14 |
# 3. local
|
|
@@ -26,4 +26,5 @@ class ModelConfigDB(Base):
|
|
| 26 |
provider_type: Mapped[str] = mapped_column(String, nullable=False)
|
| 27 |
selected_model_id: Mapped[str] = mapped_column(String, nullable=False)
|
| 28 |
selected_model_display_name: Mapped[str] = mapped_column(String, nullable=False)
|
|
|
|
| 29 |
updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False)
|
|
|
|
| 8 |
from datetime import datetime
|
| 9 |
|
| 10 |
# 2. third-party
|
| 11 |
+
from sqlalchemy import Boolean, DateTime, ForeignKey, String
|
| 12 |
from sqlalchemy.orm import Mapped, mapped_column
|
| 13 |
|
| 14 |
# 3. local
|
|
|
|
| 26 |
provider_type: Mapped[str] = mapped_column(String, nullable=False)
|
| 27 |
selected_model_id: Mapped[str] = mapped_column(String, nullable=False)
|
| 28 |
selected_model_display_name: Mapped[str] = mapped_column(String, nullable=False)
|
| 29 |
+
supports_vision: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
|
| 30 |
updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False)
|
backend/app/services/ai/analyzer.py
CHANGED
|
@@ -121,6 +121,7 @@ def run_primary_analysis(
|
|
| 121 |
image_bytes=jpeg_bytes,
|
| 122 |
prompt=prompt_text,
|
| 123 |
model_id=model_config.selected_model_id,
|
|
|
|
| 124 |
)
|
| 125 |
|
| 126 |
# ── 4. Écriture ai_raw.json TOUJOURS EN PREMIER (R05) ─────────────────
|
|
|
|
| 121 |
image_bytes=jpeg_bytes,
|
| 122 |
prompt=prompt_text,
|
| 123 |
model_id=model_config.selected_model_id,
|
| 124 |
+
supports_vision=model_config.supports_vision,
|
| 125 |
)
|
| 126 |
|
| 127 |
# ── 4. Écriture ai_raw.json TOUJOURS EN PREMIER (R05) ─────────────────
|
backend/app/services/ai/base.py
CHANGED
|
@@ -42,13 +42,21 @@ class AIProvider(ABC):
|
|
| 42 |
...
|
| 43 |
|
| 44 |
@abstractmethod
|
| 45 |
-
def generate_content(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
"""Envoie une image + prompt à l'IA et retourne le texte brut de la réponse.
|
| 47 |
|
| 48 |
Args:
|
| 49 |
image_bytes: contenu JPEG de l'image dérivée.
|
| 50 |
prompt: texte du prompt rendu depuis le template.
|
| 51 |
model_id: identifiant technique du modèle à utiliser.
|
|
|
|
|
|
|
| 52 |
|
| 53 |
Returns:
|
| 54 |
Texte brut retourné par l'API (avant parsing).
|
|
|
|
| 42 |
...
|
| 43 |
|
| 44 |
@abstractmethod
|
| 45 |
+
def generate_content(
|
| 46 |
+
self,
|
| 47 |
+
image_bytes: bytes,
|
| 48 |
+
prompt: str,
|
| 49 |
+
model_id: str,
|
| 50 |
+
supports_vision: bool = True,
|
| 51 |
+
) -> str:
|
| 52 |
"""Envoie une image + prompt à l'IA et retourne le texte brut de la réponse.
|
| 53 |
|
| 54 |
Args:
|
| 55 |
image_bytes: contenu JPEG de l'image dérivée.
|
| 56 |
prompt: texte du prompt rendu depuis le template.
|
| 57 |
model_id: identifiant technique du modèle à utiliser.
|
| 58 |
+
supports_vision: True si le modèle accepte les images (déterminé
|
| 59 |
+
par l'API du provider lors du listing, stocké en BDD).
|
| 60 |
|
| 61 |
Returns:
|
| 62 |
Texte brut retourné par l'API (avant parsing).
|
backend/app/services/ai/provider_google_ai.py
CHANGED
|
@@ -55,7 +55,7 @@ class GoogleAIProvider(AIProvider):
|
|
| 55 |
)
|
| 56 |
return result
|
| 57 |
|
| 58 |
-
def generate_content(self, image_bytes: bytes, prompt: str, model_id: str) -> str:
|
| 59 |
if not self.is_configured():
|
| 60 |
raise RuntimeError(f"Variable d'environnement manquante : {_ENV_KEY}")
|
| 61 |
client = genai.Client(api_key=os.environ[_ENV_KEY])
|
|
|
|
| 55 |
)
|
| 56 |
return result
|
| 57 |
|
| 58 |
+
def generate_content(self, image_bytes: bytes, prompt: str, model_id: str, supports_vision: bool = True) -> str:
|
| 59 |
if not self.is_configured():
|
| 60 |
raise RuntimeError(f"Variable d'environnement manquante : {_ENV_KEY}")
|
| 61 |
client = genai.Client(api_key=os.environ[_ENV_KEY])
|
backend/app/services/ai/provider_mistral.py
CHANGED
|
@@ -72,18 +72,22 @@ def _is_ocr_model(model_id: str) -> bool:
|
|
| 72 |
return "ocr" in model_id.lower()
|
| 73 |
|
| 74 |
|
|
|
|
| 75 |
def _model_supports_vision(model_id: str, model_obj: object = None) -> bool:
|
| 76 |
"""Détecte si un modèle Mistral supporte les entrées image.
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
| 80 |
"""
|
| 81 |
if model_obj is not None:
|
| 82 |
caps = getattr(model_obj, "capabilities", None)
|
| 83 |
if caps is not None:
|
| 84 |
return bool(getattr(caps, "vision", False))
|
| 85 |
-
|
| 86 |
-
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
class MistralProvider(AIProvider):
|
|
@@ -180,18 +184,25 @@ class MistralProvider(AIProvider):
|
|
| 180 |
)
|
| 181 |
return list(_MISTRAL_FALLBACK_MODELS)
|
| 182 |
|
| 183 |
-
def generate_content(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
"""Envoie image + prompt à Mistral et retourne le texte brut.
|
| 185 |
|
| 186 |
Trois chemins selon le modèle :
|
| 187 |
1. OCR (mistral-ocr-latest) :
|
| 188 |
client.ocr.process() → markdown de toutes les pages concaténées.
|
| 189 |
-
|
| 190 |
-
2. Vision (Pixtral) :
|
| 191 |
client.chat.complete() avec content multimodal (image base64 + texte).
|
| 192 |
-
3. Texte seul (
|
| 193 |
client.chat.complete() avec prompt texte uniquement.
|
| 194 |
-
|
|
|
|
|
|
|
| 195 |
"""
|
| 196 |
if not self.is_configured():
|
| 197 |
raise RuntimeError(
|
|
@@ -222,8 +233,8 @@ class MistralProvider(AIProvider):
|
|
| 222 |
getattr(page, "markdown", "") for page in pages
|
| 223 |
)
|
| 224 |
|
| 225 |
-
# ── Chemin 2 : Vision multimodale
|
| 226 |
-
if
|
| 227 |
content: object = [
|
| 228 |
{"type": "image_url", "image_url": {"url": data_url}},
|
| 229 |
{"type": "text", "text": prompt},
|
|
|
|
| 72 |
return "ocr" in model_id.lower()
|
| 73 |
|
| 74 |
|
| 75 |
+
|
| 76 |
def _model_supports_vision(model_id: str, model_obj: object = None) -> bool:
|
| 77 |
"""Détecte si un modèle Mistral supporte les entrées image.
|
| 78 |
|
| 79 |
+
Source de vérité unique : capabilities.vision retourné par l'API Mistral.
|
| 80 |
+
Aucune liste hardcodée de noms de modèles — l'API fait autorité.
|
| 81 |
+
Si capabilities n'est pas disponible (SDK ancien), retourne False
|
| 82 |
+
par sécurité (le modèle sera utilisé en mode texte seul).
|
| 83 |
"""
|
| 84 |
if model_obj is not None:
|
| 85 |
caps = getattr(model_obj, "capabilities", None)
|
| 86 |
if caps is not None:
|
| 87 |
return bool(getattr(caps, "vision", False))
|
| 88 |
+
# Sans objet modèle (fallback statique), on ne peut pas deviner :
|
| 89 |
+
# retourner False pour éviter d'envoyer une image à un modèle texte seul.
|
| 90 |
+
return False
|
| 91 |
|
| 92 |
|
| 93 |
class MistralProvider(AIProvider):
|
|
|
|
| 184 |
)
|
| 185 |
return list(_MISTRAL_FALLBACK_MODELS)
|
| 186 |
|
| 187 |
+
def generate_content(
|
| 188 |
+
self,
|
| 189 |
+
image_bytes: bytes,
|
| 190 |
+
prompt: str,
|
| 191 |
+
model_id: str,
|
| 192 |
+
supports_vision: bool = True,
|
| 193 |
+
) -> str:
|
| 194 |
"""Envoie image + prompt à Mistral et retourne le texte brut.
|
| 195 |
|
| 196 |
Trois chemins selon le modèle :
|
| 197 |
1. OCR (mistral-ocr-latest) :
|
| 198 |
client.ocr.process() → markdown de toutes les pages concaténées.
|
| 199 |
+
2. Vision (supports_vision=True) :
|
|
|
|
| 200 |
client.chat.complete() avec content multimodal (image base64 + texte).
|
| 201 |
+
3. Texte seul (supports_vision=False) :
|
| 202 |
client.chat.complete() avec prompt texte uniquement.
|
| 203 |
+
|
| 204 |
+
Le flag supports_vision est déterminé dynamiquement par l'API Mistral
|
| 205 |
+
lors du listing des modèles (capabilities.vision), puis stocké en BDD.
|
| 206 |
"""
|
| 207 |
if not self.is_configured():
|
| 208 |
raise RuntimeError(
|
|
|
|
| 233 |
getattr(page, "markdown", "") for page in pages
|
| 234 |
)
|
| 235 |
|
| 236 |
+
# ── Chemin 2 : Vision multimodale ────────────────────────────────────
|
| 237 |
+
if supports_vision:
|
| 238 |
content: object = [
|
| 239 |
{"type": "image_url", "image_url": {"url": data_url}},
|
| 240 |
{"type": "text", "text": prompt},
|
backend/app/services/ai/provider_vertex_key.py
CHANGED
|
@@ -57,5 +57,5 @@ class VertexAPIKeyProvider(AIProvider):
|
|
| 57 |
def list_models(self) -> list[ModelInfo]:
|
| 58 |
raise RuntimeError(_UNAVAILABLE_MSG)
|
| 59 |
|
| 60 |
-
def generate_content(self, image_bytes: bytes, prompt: str, model_id: str) -> str:
|
| 61 |
raise RuntimeError(_UNAVAILABLE_MSG)
|
|
|
|
| 57 |
def list_models(self) -> list[ModelInfo]:
|
| 58 |
raise RuntimeError(_UNAVAILABLE_MSG)
|
| 59 |
|
| 60 |
+
def generate_content(self, image_bytes: bytes, prompt: str, model_id: str, supports_vision: bool = True) -> str:
|
| 61 |
raise RuntimeError(_UNAVAILABLE_MSG)
|
backend/app/services/ai/provider_vertex_sa.py
CHANGED
|
@@ -85,7 +85,7 @@ class VertexServiceAccountProvider(AIProvider):
|
|
| 85 |
)
|
| 86 |
return result
|
| 87 |
|
| 88 |
-
def generate_content(self, image_bytes: bytes, prompt: str, model_id: str) -> str:
|
| 89 |
if not self.is_configured():
|
| 90 |
raise RuntimeError(f"Variable d'environnement manquante : {_ENV_KEY}")
|
| 91 |
client = self._build_client()
|
|
|
|
| 85 |
)
|
| 86 |
return result
|
| 87 |
|
| 88 |
+
def generate_content(self, image_bytes: bytes, prompt: str, model_id: str, supports_vision: bool = True) -> str:
|
| 89 |
if not self.is_configured():
|
| 90 |
raise RuntimeError(f"Variable d'environnement manquante : {_ENV_KEY}")
|
| 91 |
client = self._build_client()
|
backend/app/services/ingest/iiif_fetcher.py
CHANGED
|
@@ -1,15 +1,21 @@
|
|
| 1 |
"""
|
| 2 |
Téléchargement d'images depuis des URLs IIIF via httpx.
|
|
|
|
|
|
|
|
|
|
| 3 |
"""
|
| 4 |
# 1. stdlib
|
| 5 |
import logging
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# 2. third-party
|
| 8 |
import httpx
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
| 12 |
-
_DEFAULT_TIMEOUT =
|
| 13 |
|
| 14 |
_HEADERS = {
|
| 15 |
"User-Agent": (
|
|
@@ -19,10 +25,119 @@ _HEADERS = {
|
|
| 19 |
"Accept": "image/jpeg,image/png,image/*,*/*",
|
| 20 |
}
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def fetch_iiif_image(url: str, timeout: float = _DEFAULT_TIMEOUT) -> bytes:
|
| 24 |
"""Télécharge une image depuis une URL IIIF complète.
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
Args:
|
| 27 |
url: URL complète de l'image (ex. https://.../full/max/0/default.jpg).
|
| 28 |
timeout: délai maximal en secondes (défaut : 60 s).
|
|
@@ -35,14 +150,9 @@ def fetch_iiif_image(url: str, timeout: float = _DEFAULT_TIMEOUT) -> bytes:
|
|
| 35 |
httpx.TimeoutException: si la requête dépasse le délai.
|
| 36 |
httpx.RequestError: pour toute autre erreur réseau.
|
| 37 |
"""
|
|
|
|
| 38 |
logger.info("Fetching IIIF image", extra={"url": url})
|
| 39 |
-
response =
|
| 40 |
-
url,
|
| 41 |
-
headers=_HEADERS,
|
| 42 |
-
follow_redirects=True,
|
| 43 |
-
timeout=httpx.Timeout(timeout, connect=10.0),
|
| 44 |
-
)
|
| 45 |
-
response.raise_for_status()
|
| 46 |
logger.info(
|
| 47 |
"IIIF image fetched",
|
| 48 |
extra={"url": url, "size_bytes": len(response.content)},
|
|
@@ -71,13 +181,7 @@ def fetch_iiif_derivative(
|
|
| 71 |
# Pattern IIIF Image API : !w,h = "best fit" (le serveur choisit)
|
| 72 |
derivative_url = f"{service_url.rstrip('/')}/full/!{max_px},{max_px}/0/default.jpg"
|
| 73 |
logger.info("Fetching IIIF derivative", extra={"url": derivative_url, "max_px": max_px})
|
| 74 |
-
response =
|
| 75 |
-
derivative_url,
|
| 76 |
-
headers=_HEADERS,
|
| 77 |
-
follow_redirects=True,
|
| 78 |
-
timeout=httpx.Timeout(timeout, connect=10.0),
|
| 79 |
-
)
|
| 80 |
-
response.raise_for_status()
|
| 81 |
logger.info(
|
| 82 |
"IIIF derivative fetched",
|
| 83 |
extra={"url": derivative_url, "size_bytes": len(response.content)},
|
|
|
|
| 1 |
"""
|
| 2 |
Téléchargement d'images depuis des URLs IIIF via httpx.
|
| 3 |
+
|
| 4 |
+
Inclut un rate-limiter global et un retry avec backoff exponentiel
|
| 5 |
+
pour respecter les limites des serveurs IIIF patrimoniaux (Gallica, etc.).
|
| 6 |
"""
|
| 7 |
# 1. stdlib
|
| 8 |
import logging
|
| 9 |
+
import re
|
| 10 |
+
import threading
|
| 11 |
+
import time
|
| 12 |
|
| 13 |
# 2. third-party
|
| 14 |
import httpx
|
| 15 |
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
| 18 |
+
_DEFAULT_TIMEOUT = 60.0 # secondes (connect 15s + read 60s)
|
| 19 |
|
| 20 |
_HEADERS = {
|
| 21 |
"User-Agent": (
|
|
|
|
| 25 |
"Accept": "image/jpeg,image/png,image/*,*/*",
|
| 26 |
}
|
| 27 |
|
| 28 |
+
# ── Rate-limiter global ────────────────────────────────────────────────────
|
| 29 |
+
# Gallica and similar IIIF servers enforce strict rate limits.
|
| 30 |
+
# We enforce a minimum delay between consecutive requests.
|
| 31 |
+
_MIN_REQUEST_INTERVAL = 1.0 # secondes entre deux requêtes
|
| 32 |
+
_rate_lock = threading.Lock()
|
| 33 |
+
_last_request_time = 0.0
|
| 34 |
+
|
| 35 |
+
# ── Retry configuration ───────────────────────────────────────────────────
|
| 36 |
+
_MAX_RETRIES = 4
|
| 37 |
+
_INITIAL_BACKOFF = 2.0 # secondes, doublé à chaque retry
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _wait_rate_limit() -> None:
|
| 41 |
+
"""Attend si nécessaire pour respecter le débit maximal vers les serveurs IIIF."""
|
| 42 |
+
global _last_request_time
|
| 43 |
+
with _rate_lock:
|
| 44 |
+
now = time.monotonic()
|
| 45 |
+
elapsed = now - _last_request_time
|
| 46 |
+
if elapsed < _MIN_REQUEST_INTERVAL:
|
| 47 |
+
time.sleep(_MIN_REQUEST_INTERVAL - elapsed)
|
| 48 |
+
_last_request_time = time.monotonic()
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def _fetch_with_retry(url: str, timeout: float) -> httpx.Response:
|
| 52 |
+
"""GET avec retry et backoff exponentiel sur 429 / 5xx.
|
| 53 |
+
|
| 54 |
+
Respecte le header Retry-After si présent dans la réponse 429.
|
| 55 |
+
"""
|
| 56 |
+
backoff = _INITIAL_BACKOFF
|
| 57 |
+
last_exc: Exception | None = None
|
| 58 |
+
|
| 59 |
+
for attempt in range(_MAX_RETRIES + 1):
|
| 60 |
+
_wait_rate_limit()
|
| 61 |
+
try:
|
| 62 |
+
response = httpx.get(
|
| 63 |
+
url,
|
| 64 |
+
headers=_HEADERS,
|
| 65 |
+
follow_redirects=True,
|
| 66 |
+
timeout=httpx.Timeout(timeout, connect=15.0),
|
| 67 |
+
)
|
| 68 |
+
if response.status_code == 429 or response.status_code >= 500:
|
| 69 |
+
# Respect Retry-After header if present
|
| 70 |
+
retry_after = response.headers.get("Retry-After")
|
| 71 |
+
if retry_after:
|
| 72 |
+
try:
|
| 73 |
+
wait_time = float(retry_after)
|
| 74 |
+
except ValueError:
|
| 75 |
+
wait_time = backoff
|
| 76 |
+
else:
|
| 77 |
+
wait_time = backoff
|
| 78 |
+
|
| 79 |
+
if attempt < _MAX_RETRIES:
|
| 80 |
+
logger.warning(
|
| 81 |
+
"HTTP %d — retry %d/%d dans %.1fs",
|
| 82 |
+
response.status_code,
|
| 83 |
+
attempt + 1,
|
| 84 |
+
_MAX_RETRIES,
|
| 85 |
+
wait_time,
|
| 86 |
+
extra={"url": url},
|
| 87 |
+
)
|
| 88 |
+
time.sleep(wait_time)
|
| 89 |
+
backoff *= 2
|
| 90 |
+
continue
|
| 91 |
+
# Last attempt: raise
|
| 92 |
+
response.raise_for_status()
|
| 93 |
+
|
| 94 |
+
response.raise_for_status()
|
| 95 |
+
return response
|
| 96 |
+
|
| 97 |
+
except httpx.TimeoutException as exc:
|
| 98 |
+
last_exc = exc
|
| 99 |
+
if attempt < _MAX_RETRIES:
|
| 100 |
+
logger.warning(
|
| 101 |
+
"Timeout — retry %d/%d dans %.1fs",
|
| 102 |
+
attempt + 1,
|
| 103 |
+
_MAX_RETRIES,
|
| 104 |
+
backoff,
|
| 105 |
+
extra={"url": url},
|
| 106 |
+
)
|
| 107 |
+
time.sleep(backoff)
|
| 108 |
+
backoff *= 2
|
| 109 |
+
continue
|
| 110 |
+
raise
|
| 111 |
+
|
| 112 |
+
# Should not reach here, but just in case
|
| 113 |
+
raise last_exc or RuntimeError(f"Échec après {_MAX_RETRIES} retries : {url}")
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def _rewrite_full_to_reduced(url: str, max_px: int = 1500) -> str:
|
| 117 |
+
"""Réécrit une URL IIIF /full/full/ ou /full/max/ en /full/!{max_px},{max_px}/.
|
| 118 |
+
|
| 119 |
+
Cela demande au serveur IIIF de redimensionner côté serveur au lieu de
|
| 120 |
+
retourner l'image en pleine résolution. Beaucoup plus rapide et respectueux
|
| 121 |
+
des quotas serveur.
|
| 122 |
+
|
| 123 |
+
Si l'URL n'est pas une URL IIIF standard, elle est retournée inchangée.
|
| 124 |
+
"""
|
| 125 |
+
# Match IIIF Image API pattern: .../full/(full|max)/0/(default|native).(jpg|png|...)
|
| 126 |
+
pattern = r"(/full/)(full|max)(/0/)"
|
| 127 |
+
replacement = rf"\g<1>!{max_px},{max_px}\3"
|
| 128 |
+
new_url = re.sub(pattern, replacement, url)
|
| 129 |
+
if new_url != url:
|
| 130 |
+
logger.info("URL IIIF réécrite: full → !%d,%d", max_px, max_px, extra={"original": url})
|
| 131 |
+
return new_url
|
| 132 |
+
|
| 133 |
|
| 134 |
def fetch_iiif_image(url: str, timeout: float = _DEFAULT_TIMEOUT) -> bytes:
|
| 135 |
"""Télécharge une image depuis une URL IIIF complète.
|
| 136 |
|
| 137 |
+
Si l'URL demande la pleine résolution (/full/full/ ou /full/max/),
|
| 138 |
+
elle est automatiquement réécrite pour demander un d��rivé 1500px max
|
| 139 |
+
côté serveur, ce qui est plus rapide et évite le rate-limiting.
|
| 140 |
+
|
| 141 |
Args:
|
| 142 |
url: URL complète de l'image (ex. https://.../full/max/0/default.jpg).
|
| 143 |
timeout: délai maximal en secondes (défaut : 60 s).
|
|
|
|
| 150 |
httpx.TimeoutException: si la requête dépasse le délai.
|
| 151 |
httpx.RequestError: pour toute autre erreur réseau.
|
| 152 |
"""
|
| 153 |
+
url = _rewrite_full_to_reduced(url)
|
| 154 |
logger.info("Fetching IIIF image", extra={"url": url})
|
| 155 |
+
response = _fetch_with_retry(url, timeout)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
logger.info(
|
| 157 |
"IIIF image fetched",
|
| 158 |
extra={"url": url, "size_bytes": len(response.content)},
|
|
|
|
| 181 |
# Pattern IIIF Image API : !w,h = "best fit" (le serveur choisit)
|
| 182 |
derivative_url = f"{service_url.rstrip('/')}/full/!{max_px},{max_px}/0/default.jpg"
|
| 183 |
logger.info("Fetching IIIF derivative", extra={"url": derivative_url, "max_px": max_px})
|
| 184 |
+
response = _fetch_with_retry(derivative_url, timeout)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
logger.info(
|
| 186 |
"IIIF derivative fetched",
|
| 187 |
extra={"url": derivative_url, "size_bytes": len(response.content)},
|
backend/app/services/job_runner.py
CHANGED
|
@@ -126,7 +126,7 @@ async def _run_job_impl(job_id: str, db: AsyncSession) -> None:
|
|
| 126 |
selected_model_id=model_db.selected_model_id,
|
| 127 |
selected_model_display_name=model_db.selected_model_display_name,
|
| 128 |
provider=ProviderType(model_db.provider_type),
|
| 129 |
-
supports_vision=
|
| 130 |
last_fetched_at=model_db.updated_at,
|
| 131 |
available_models=[],
|
| 132 |
)
|
|
|
|
| 126 |
selected_model_id=model_db.selected_model_id,
|
| 127 |
selected_model_display_name=model_db.selected_model_display_name,
|
| 128 |
provider=ProviderType(model_db.provider_type),
|
| 129 |
+
supports_vision=model_db.supports_vision,
|
| 130 |
last_fetched_at=model_db.updated_at,
|
| 131 |
available_models=[],
|
| 132 |
)
|
backend/tests/test_image_pipeline.py
CHANGED
|
@@ -261,8 +261,11 @@ def test_fetch_iiif_image_success():
|
|
| 261 |
"""Retourne les bytes de l'image si la requête réussit."""
|
| 262 |
fake_bytes = _make_jpeg_bytes(100, 100)
|
| 263 |
|
| 264 |
-
with patch("app.services.ingest.iiif_fetcher.httpx.get") as mock_get
|
|
|
|
|
|
|
| 265 |
mock_response = MagicMock()
|
|
|
|
| 266 |
mock_response.content = fake_bytes
|
| 267 |
mock_response.raise_for_status.return_value = None
|
| 268 |
mock_get.return_value = mock_response
|
|
@@ -272,15 +275,18 @@ def test_fetch_iiif_image_success():
|
|
| 272 |
assert result == fake_bytes
|
| 273 |
_, kwargs = mock_get.call_args
|
| 274 |
assert kwargs["follow_redirects"] is True
|
| 275 |
-
# Timeout is
|
| 276 |
-
assert kwargs["timeout"].connect ==
|
| 277 |
-
assert kwargs["timeout"].read ==
|
| 278 |
|
| 279 |
|
| 280 |
def test_fetch_iiif_image_http_error():
|
| 281 |
-
"""Propage HTTPStatusError si le serveur répond 404."""
|
| 282 |
-
with patch("app.services.ingest.iiif_fetcher.httpx.get") as mock_get
|
|
|
|
|
|
|
| 283 |
mock_response = MagicMock()
|
|
|
|
| 284 |
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
|
| 285 |
"404 Not Found",
|
| 286 |
request=MagicMock(),
|
|
@@ -293,8 +299,10 @@ def test_fetch_iiif_image_http_error():
|
|
| 293 |
|
| 294 |
|
| 295 |
def test_fetch_iiif_image_timeout():
|
| 296 |
-
"""Propage TimeoutException
|
| 297 |
-
with patch("app.services.ingest.iiif_fetcher.httpx.get") as mock_get
|
|
|
|
|
|
|
| 298 |
mock_get.side_effect = httpx.TimeoutException("timed out")
|
| 299 |
|
| 300 |
with pytest.raises(httpx.TimeoutException):
|
|
@@ -305,8 +313,11 @@ def test_fetch_iiif_image_custom_timeout():
|
|
| 305 |
"""Le timeout personnalisé est bien transmis à httpx.get."""
|
| 306 |
fake_bytes = _make_jpeg_bytes(50, 50)
|
| 307 |
|
| 308 |
-
with patch("app.services.ingest.iiif_fetcher.httpx.get") as mock_get
|
|
|
|
|
|
|
| 309 |
mock_response = MagicMock()
|
|
|
|
| 310 |
mock_response.content = fake_bytes
|
| 311 |
mock_response.raise_for_status.return_value = None
|
| 312 |
mock_get.return_value = mock_response
|
|
@@ -314,9 +325,9 @@ def test_fetch_iiif_image_custom_timeout():
|
|
| 314 |
fetch_iiif_image("https://example.com/img.jpg", timeout=120.0)
|
| 315 |
|
| 316 |
_, kwargs = mock_get.call_args
|
| 317 |
-
# Custom timeout wraps in httpx.Timeout(120.0, connect=
|
| 318 |
assert kwargs["timeout"].read == 120.0
|
| 319 |
-
assert kwargs["timeout"].connect ==
|
| 320 |
|
| 321 |
|
| 322 |
# ---------------------------------------------------------------------------
|
|
|
|
| 261 |
"""Retourne les bytes de l'image si la requête réussit."""
|
| 262 |
fake_bytes = _make_jpeg_bytes(100, 100)
|
| 263 |
|
| 264 |
+
with patch("app.services.ingest.iiif_fetcher.httpx.get") as mock_get, \
|
| 265 |
+
patch("app.services.ingest.iiif_fetcher.time.sleep"), \
|
| 266 |
+
patch("app.services.ingest.iiif_fetcher.time.monotonic", return_value=0.0):
|
| 267 |
mock_response = MagicMock()
|
| 268 |
+
mock_response.status_code = 200
|
| 269 |
mock_response.content = fake_bytes
|
| 270 |
mock_response.raise_for_status.return_value = None
|
| 271 |
mock_get.return_value = mock_response
|
|
|
|
| 275 |
assert result == fake_bytes
|
| 276 |
_, kwargs = mock_get.call_args
|
| 277 |
assert kwargs["follow_redirects"] is True
|
| 278 |
+
# Timeout is an httpx.Timeout object (connect=15s, read=60s)
|
| 279 |
+
assert kwargs["timeout"].connect == 15.0
|
| 280 |
+
assert kwargs["timeout"].read == 60.0
|
| 281 |
|
| 282 |
|
| 283 |
def test_fetch_iiif_image_http_error():
|
| 284 |
+
"""Propage HTTPStatusError si le serveur répond 404 (pas de retry sur 4xx hors 429)."""
|
| 285 |
+
with patch("app.services.ingest.iiif_fetcher.httpx.get") as mock_get, \
|
| 286 |
+
patch("app.services.ingest.iiif_fetcher.time.sleep"), \
|
| 287 |
+
patch("app.services.ingest.iiif_fetcher.time.monotonic", return_value=0.0):
|
| 288 |
mock_response = MagicMock()
|
| 289 |
+
mock_response.status_code = 404
|
| 290 |
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
|
| 291 |
"404 Not Found",
|
| 292 |
request=MagicMock(),
|
|
|
|
| 299 |
|
| 300 |
|
| 301 |
def test_fetch_iiif_image_timeout():
|
| 302 |
+
"""Propage TimeoutException après épuisement des retries."""
|
| 303 |
+
with patch("app.services.ingest.iiif_fetcher.httpx.get") as mock_get, \
|
| 304 |
+
patch("app.services.ingest.iiif_fetcher.time.sleep"), \
|
| 305 |
+
patch("app.services.ingest.iiif_fetcher.time.monotonic", return_value=0.0):
|
| 306 |
mock_get.side_effect = httpx.TimeoutException("timed out")
|
| 307 |
|
| 308 |
with pytest.raises(httpx.TimeoutException):
|
|
|
|
| 313 |
"""Le timeout personnalisé est bien transmis à httpx.get."""
|
| 314 |
fake_bytes = _make_jpeg_bytes(50, 50)
|
| 315 |
|
| 316 |
+
with patch("app.services.ingest.iiif_fetcher.httpx.get") as mock_get, \
|
| 317 |
+
patch("app.services.ingest.iiif_fetcher.time.sleep"), \
|
| 318 |
+
patch("app.services.ingest.iiif_fetcher.time.monotonic", return_value=0.0):
|
| 319 |
mock_response = MagicMock()
|
| 320 |
+
mock_response.status_code = 200
|
| 321 |
mock_response.content = fake_bytes
|
| 322 |
mock_response.raise_for_status.return_value = None
|
| 323 |
mock_get.return_value = mock_response
|
|
|
|
| 325 |
fetch_iiif_image("https://example.com/img.jpg", timeout=120.0)
|
| 326 |
|
| 327 |
_, kwargs = mock_get.call_args
|
| 328 |
+
# Custom timeout wraps in httpx.Timeout(120.0, connect=15.0)
|
| 329 |
assert kwargs["timeout"].read == 120.0
|
| 330 |
+
assert kwargs["timeout"].connect == 15.0
|
| 331 |
|
| 332 |
|
| 333 |
# ---------------------------------------------------------------------------
|
backend/tests/test_provider_mistral.py
CHANGED
|
@@ -88,33 +88,36 @@ def _make_fake_mistralai(models: list[_FakeModel] | None = None) -> _types.Modul
|
|
| 88 |
|
| 89 |
|
| 90 |
# ---------------------------------------------------------------------------
|
| 91 |
-
# _model_supports_vision() —
|
| 92 |
# ---------------------------------------------------------------------------
|
| 93 |
|
| 94 |
-
def
|
| 95 |
-
|
| 96 |
-
assert _model_supports_vision("pixtral-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
def test_vision_detection_text_models_by_name():
|
| 100 |
-
assert _model_supports_vision("mistral-large-latest") is False
|
| 101 |
assert _model_supports_vision("mistral-small-latest") is False
|
| 102 |
assert _model_supports_vision("codestral-latest") is False
|
| 103 |
|
| 104 |
|
| 105 |
-
def
|
|
|
|
| 106 |
m_vision = _FakeModel("some-model", vision=True)
|
| 107 |
m_text = _FakeModel("some-model", vision=False)
|
| 108 |
assert _model_supports_vision("some-model", m_vision) is True
|
| 109 |
assert _model_supports_vision("some-model", m_text) is False
|
| 110 |
|
| 111 |
|
| 112 |
-
def
|
| 113 |
-
"""capabilities.vision=False
|
| 114 |
m = _FakeModel("pixtral-test", vision=False)
|
| 115 |
assert _model_supports_vision("pixtral-test", m) is False
|
| 116 |
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
# ---------------------------------------------------------------------------
|
| 119 |
# is_configured()
|
| 120 |
# ---------------------------------------------------------------------------
|
|
@@ -263,25 +266,27 @@ def test_list_models_fallback_backward_compat():
|
|
| 263 |
# ---------------------------------------------------------------------------
|
| 264 |
|
| 265 |
def test_generate_content_vision_model_returns_text(monkeypatch):
|
| 266 |
-
"""Modèle vision
|
| 267 |
monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
|
| 268 |
fake = _make_fake_mistralai()
|
| 269 |
monkeypatch.setitem(sys.modules, "mistralai", fake)
|
| 270 |
|
| 271 |
result = MistralProvider().generate_content(
|
| 272 |
-
b"fake-jpeg", "Analyse ce folio.", "pixtral-large-latest"
|
|
|
|
| 273 |
)
|
| 274 |
assert result == "Voici le JSON de la page."
|
| 275 |
|
| 276 |
|
| 277 |
def test_generate_content_text_model_returns_text(monkeypatch):
|
| 278 |
-
"""Modèle texte (
|
| 279 |
monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
|
| 280 |
fake = _make_fake_mistralai()
|
| 281 |
monkeypatch.setitem(sys.modules, "mistralai", fake)
|
| 282 |
|
| 283 |
result = MistralProvider().generate_content(
|
| 284 |
-
b"fake-jpeg", "Analyse ce folio.", "mistral-large-latest"
|
|
|
|
| 285 |
)
|
| 286 |
assert result == "Voici le JSON de la page."
|
| 287 |
|
|
@@ -305,7 +310,7 @@ def test_generate_content_vision_sends_image_url(monkeypatch):
|
|
| 305 |
fake.Mistral = _FakeMistral
|
| 306 |
monkeypatch.setitem(sys.modules, "mistralai", fake)
|
| 307 |
|
| 308 |
-
MistralProvider().generate_content(b"jpeg", "prompt", "pixtral-large-latest")
|
| 309 |
|
| 310 |
assert len(captured) == 1
|
| 311 |
content = captured[0]["content"]
|
|
@@ -316,7 +321,7 @@ def test_generate_content_vision_sends_image_url(monkeypatch):
|
|
| 316 |
|
| 317 |
|
| 318 |
def test_generate_content_text_sends_string_content(monkeypatch):
|
| 319 |
-
"""Modèle texte : le message content est une chaîne (pas d'image)."""
|
| 320 |
monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
|
| 321 |
captured: list[dict] = []
|
| 322 |
|
|
@@ -334,7 +339,7 @@ def test_generate_content_text_sends_string_content(monkeypatch):
|
|
| 334 |
fake.Mistral = _FakeMistral
|
| 335 |
monkeypatch.setitem(sys.modules, "mistralai", fake)
|
| 336 |
|
| 337 |
-
MistralProvider().generate_content(b"jpeg", "mon prompt", "mistral-large-latest")
|
| 338 |
|
| 339 |
assert len(captured) == 1
|
| 340 |
assert captured[0]["content"] == "mon prompt"
|
|
@@ -479,7 +484,7 @@ def test_generate_content_ocr_model_not_called_for_vision(monkeypatch):
|
|
| 479 |
fake.Mistral = _FakeMistral
|
| 480 |
monkeypatch.setitem(sys.modules, "mistralai", fake)
|
| 481 |
|
| 482 |
-
MistralProvider().generate_content(b"jpeg", "prompt", "pixtral-large-latest")
|
| 483 |
assert len(ocr_called) == 0
|
| 484 |
|
| 485 |
|
|
|
|
| 88 |
|
| 89 |
|
| 90 |
# ---------------------------------------------------------------------------
|
| 91 |
+
# _model_supports_vision() — détection dynamique via l'API
|
| 92 |
# ---------------------------------------------------------------------------
|
| 93 |
|
| 94 |
+
def test_vision_detection_without_model_obj_returns_false():
|
| 95 |
+
"""Sans objet modèle (pas de capabilities), retourne False par sécurité."""
|
| 96 |
+
assert _model_supports_vision("pixtral-large-latest") is False
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
assert _model_supports_vision("mistral-small-latest") is False
|
| 98 |
assert _model_supports_vision("codestral-latest") is False
|
| 99 |
|
| 100 |
|
| 101 |
+
def test_vision_detection_uses_capabilities_from_api():
|
| 102 |
+
"""La source de vérité est capabilities.vision retourné par l'API Mistral."""
|
| 103 |
m_vision = _FakeModel("some-model", vision=True)
|
| 104 |
m_text = _FakeModel("some-model", vision=False)
|
| 105 |
assert _model_supports_vision("some-model", m_vision) is True
|
| 106 |
assert _model_supports_vision("some-model", m_text) is False
|
| 107 |
|
| 108 |
|
| 109 |
+
def test_vision_detection_capabilities_false_on_any_model():
|
| 110 |
+
"""capabilities.vision=False → pas de vision, quel que soit le nom."""
|
| 111 |
m = _FakeModel("pixtral-test", vision=False)
|
| 112 |
assert _model_supports_vision("pixtral-test", m) is False
|
| 113 |
|
| 114 |
|
| 115 |
+
def test_vision_detection_capabilities_true_on_any_model():
|
| 116 |
+
"""capabilities.vision=True → vision activée, quel que soit le nom."""
|
| 117 |
+
m = _FakeModel("mistral-small-latest", vision=True)
|
| 118 |
+
assert _model_supports_vision("mistral-small-latest", m) is True
|
| 119 |
+
|
| 120 |
+
|
| 121 |
# ---------------------------------------------------------------------------
|
| 122 |
# is_configured()
|
| 123 |
# ---------------------------------------------------------------------------
|
|
|
|
| 266 |
# ---------------------------------------------------------------------------
|
| 267 |
|
| 268 |
def test_generate_content_vision_model_returns_text(monkeypatch):
|
| 269 |
+
"""Modèle vision : envoie l'image et retourne la réponse."""
|
| 270 |
monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
|
| 271 |
fake = _make_fake_mistralai()
|
| 272 |
monkeypatch.setitem(sys.modules, "mistralai", fake)
|
| 273 |
|
| 274 |
result = MistralProvider().generate_content(
|
| 275 |
+
b"fake-jpeg", "Analyse ce folio.", "pixtral-large-latest",
|
| 276 |
+
supports_vision=True,
|
| 277 |
)
|
| 278 |
assert result == "Voici le JSON de la page."
|
| 279 |
|
| 280 |
|
| 281 |
def test_generate_content_text_model_returns_text(monkeypatch):
|
| 282 |
+
"""Modèle texte (supports_vision=False) : envoie seulement le prompt."""
|
| 283 |
monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
|
| 284 |
fake = _make_fake_mistralai()
|
| 285 |
monkeypatch.setitem(sys.modules, "mistralai", fake)
|
| 286 |
|
| 287 |
result = MistralProvider().generate_content(
|
| 288 |
+
b"fake-jpeg", "Analyse ce folio.", "mistral-large-latest",
|
| 289 |
+
supports_vision=False,
|
| 290 |
)
|
| 291 |
assert result == "Voici le JSON de la page."
|
| 292 |
|
|
|
|
| 310 |
fake.Mistral = _FakeMistral
|
| 311 |
monkeypatch.setitem(sys.modules, "mistralai", fake)
|
| 312 |
|
| 313 |
+
MistralProvider().generate_content(b"jpeg", "prompt", "pixtral-large-latest", supports_vision=True)
|
| 314 |
|
| 315 |
assert len(captured) == 1
|
| 316 |
content = captured[0]["content"]
|
|
|
|
| 321 |
|
| 322 |
|
| 323 |
def test_generate_content_text_sends_string_content(monkeypatch):
|
| 324 |
+
"""Modèle texte (supports_vision=False) : le message content est une chaîne (pas d'image)."""
|
| 325 |
monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
|
| 326 |
captured: list[dict] = []
|
| 327 |
|
|
|
|
| 339 |
fake.Mistral = _FakeMistral
|
| 340 |
monkeypatch.setitem(sys.modules, "mistralai", fake)
|
| 341 |
|
| 342 |
+
MistralProvider().generate_content(b"jpeg", "mon prompt", "mistral-large-latest", supports_vision=False)
|
| 343 |
|
| 344 |
assert len(captured) == 1
|
| 345 |
assert captured[0]["content"] == "mon prompt"
|
|
|
|
| 484 |
fake.Mistral = _FakeMistral
|
| 485 |
monkeypatch.setitem(sys.modules, "mistralai", fake)
|
| 486 |
|
| 487 |
+
MistralProvider().generate_content(b"jpeg", "prompt", "pixtral-large-latest", supports_vision=True)
|
| 488 |
assert len(ocr_called) == 0
|
| 489 |
|
| 490 |
|
frontend/src/lib/api.ts
CHANGED
|
@@ -280,11 +280,13 @@ export const selectModel = (
|
|
| 280 |
modelId: string,
|
| 281 |
displayName: string,
|
| 282 |
providerType: string,
|
|
|
|
| 283 |
): Promise<CorpusModelConfig> =>
|
| 284 |
put(`/api/v1/corpora/${corpusId}/model`, {
|
| 285 |
model_id: modelId,
|
| 286 |
display_name: displayName,
|
| 287 |
provider_type: providerType,
|
|
|
|
| 288 |
})
|
| 289 |
|
| 290 |
export const deleteCorpus = (id: string): Promise<void> =>
|
|
@@ -295,6 +297,7 @@ export interface CorpusModelConfig {
|
|
| 295 |
selected_model_id: string
|
| 296 |
selected_model_display_name: string
|
| 297 |
provider_type: string
|
|
|
|
| 298 |
updated_at: string
|
| 299 |
}
|
| 300 |
|
|
|
|
| 280 |
modelId: string,
|
| 281 |
displayName: string,
|
| 282 |
providerType: string,
|
| 283 |
+
supportsVision: boolean = true,
|
| 284 |
): Promise<CorpusModelConfig> =>
|
| 285 |
put(`/api/v1/corpora/${corpusId}/model`, {
|
| 286 |
model_id: modelId,
|
| 287 |
display_name: displayName,
|
| 288 |
provider_type: providerType,
|
| 289 |
+
supports_vision: supportsVision,
|
| 290 |
})
|
| 291 |
|
| 292 |
export const deleteCorpus = (id: string): Promise<void> =>
|
|
|
|
| 297 |
selected_model_id: string
|
| 298 |
selected_model_display_name: string
|
| 299 |
provider_type: string
|
| 300 |
+
supports_vision: boolean
|
| 301 |
updated_at: string
|
| 302 |
}
|
| 303 |
|
frontend/src/pages/Admin.tsx
CHANGED
|
@@ -190,7 +190,7 @@ function ModelPanel({ corpusId, onSaved }: ModelPanelProps) {
|
|
| 190 |
setSavingModel(true)
|
| 191 |
const model = models.find((m) => m.model_id === selectedModelId)
|
| 192 |
try {
|
| 193 |
-
await selectModel(corpusId, selectedModelId, model?.display_name ?? selectedModelId, selectedProvider)
|
| 194 |
const updated = await getCorpusModel(corpusId)
|
| 195 |
setCurrentModel(updated)
|
| 196 |
setSaveSuccess(`Modele "${model?.display_name ?? selectedModelId}" associe.`)
|
|
|
|
| 190 |
setSavingModel(true)
|
| 191 |
const model = models.find((m) => m.model_id === selectedModelId)
|
| 192 |
try {
|
| 193 |
+
await selectModel(corpusId, selectedModelId, model?.display_name ?? selectedModelId, selectedProvider, model?.supports_vision ?? true)
|
| 194 |
const updated = await getCorpusModel(corpusId)
|
| 195 |
setCurrentModel(updated)
|
| 196 |
setSaveSuccess(`Modele "${model?.display_name ?? selectedModelId}" associe.`)
|