"""Integration tests for the FastAPI routes. Tests the full API surface: providers, jobs, exports, viewer. """ from __future__ import annotations import io from typing import TYPE_CHECKING import pytest from fastapi.testclient import TestClient from src.app.main import app if TYPE_CHECKING: from pathlib import Path @pytest.fixture def client(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> TestClient: """TestClient with lifespan — ensures DB/FileStore/JobService are initialized.""" monkeypatch.setenv("STORAGE_ROOT", str(tmp_path / "data")) # Force re-creation of settings from src.app import settings as settings_mod monkeypatch.setattr( settings_mod, "get_settings", lambda: settings_mod.Settings(storage_root=tmp_path / "data"), ) with TestClient(app) as c: yield c @pytest.fixture def paddle_payload_bytes(fixtures_dir: Path) -> bytes: with open(fixtures_dir / "paddle_ocr_sample.json", "rb") as f: return f.read() # -- Health ------------------------------------------------------------------ class TestHealth: def test_health(self, client: TestClient) -> None: r = client.get("/health") assert r.status_code == 200 assert r.json()["status"] == "ok" # -- Providers --------------------------------------------------------------- class TestProviders: def test_register_and_list(self, client: TestClient) -> None: r = client.post("/providers", json={ "provider_id": "test_paddle", "display_name": "PaddleOCR Test", "runtime_type": "local", "model_id_or_path": "/models/paddle", "family": "word_box_json", }) assert r.status_code == 201 data = r.json() assert data["provider_id"] == "test_paddle" # List r = client.get("/providers") assert r.status_code == 200 providers = r.json() assert any(p.get("provider_id") == "test_paddle" for p in providers) def test_get_provider(self, client: TestClient) -> None: client.post("/providers", json={ "provider_id": "get_test", "display_name": "Get Test", "runtime_type": "local", "model_id_or_path": "/models/test", "family": "word_box_json", }) r = client.get("/providers/get_test") assert r.status_code == 200 assert r.json()["provider_id"] == "get_test" def test_get_nonexistent_provider(self, client: TestClient) -> None: r = client.get("/providers/nonexistent") assert r.status_code == 404 def test_delete_provider(self, client: TestClient) -> None: client.post("/providers", json={ "provider_id": "del_test", "display_name": "Del Test", "runtime_type": "local", "model_id_or_path": "/models/test", "family": "word_box_json", }) r = client.delete("/providers/del_test") assert r.status_code == 204 r = client.get("/providers/del_test") assert r.status_code == 404 def test_delete_nonexistent_provider(self, client: TestClient) -> None: r = client.delete("/providers/nonexistent") assert r.status_code == 404 def test_register_invalid(self, client: TestClient) -> None: r = client.post("/providers", json={ "provider_id": "", # invalid: empty "display_name": "Bad", "runtime_type": "local", "model_id_or_path": "/x", "family": "word_box_json", }) assert r.status_code == 422 # -- Jobs -------------------------------------------------------------------- class TestJobs: def _create_job(self, client: TestClient, payload_bytes: bytes) -> dict: r = client.post( "/jobs", params={ "provider_id": "paddleocr", "provider_family": "word_box_json", "image_width": 2480, "image_height": 3508, }, files={ "raw_payload_file": ( "payload.json", io.BytesIO(payload_bytes), "application/json", ), }, ) assert r.status_code == 201 return r.json() def test_create_and_run_job(self, client: TestClient, paddle_payload_bytes: bytes) -> None: data = self._create_job(client, paddle_payload_bytes) assert data["status"] == "succeeded" assert data["has_alto"] is True assert data["has_page_xml"] is True assert data["error"] is None def test_list_jobs(self, client: TestClient, paddle_payload_bytes: bytes) -> None: self._create_job(client, paddle_payload_bytes) r = client.get("/jobs") assert r.status_code == 200 jobs = r.json() assert len(jobs) >= 1 def test_get_job(self, client: TestClient, paddle_payload_bytes: bytes) -> None: created = self._create_job(client, paddle_payload_bytes) job_id = created["job_id"] r = client.get(f"/jobs/{job_id}") assert r.status_code == 200 data = r.json() assert data["job_id"] == job_id assert data["status"] == "succeeded" def test_get_nonexistent_job(self, client: TestClient) -> None: r = client.get("/jobs/nonexistent") assert r.status_code == 404 def test_get_job_logs(self, client: TestClient, paddle_payload_bytes: bytes) -> None: created = self._create_job(client, paddle_payload_bytes) job_id = created["job_id"] r = client.get(f"/jobs/{job_id}/logs") assert r.status_code == 200 events = r.json() assert len(events) > 0 steps = [e["step"] for e in events] assert "normalize" in steps assert "export_alto" in steps def test_invalid_payload(self, client: TestClient) -> None: r = client.post( "/jobs", params={ "provider_id": "test", "provider_family": "word_box_json", "image_width": 100, "image_height": 100, }, files={"raw_payload_file": ("bad.json", io.BytesIO(b"not json"), "application/json")}, ) assert r.status_code == 422 # -- Exports ----------------------------------------------------------------- class TestExports: def _create_job(self, client: TestClient, payload_bytes: bytes) -> str: r = client.post( "/jobs", params={ "provider_id": "paddleocr", "provider_family": "word_box_json", "image_width": 2480, "image_height": 3508, }, files={"raw_payload_file": ("p.json", io.BytesIO(payload_bytes), "application/json")}, ) return r.json()["job_id"] def test_get_raw_payload(self, client: TestClient, paddle_payload_bytes: bytes) -> None: job_id = self._create_job(client, paddle_payload_bytes) r = client.get(f"/jobs/{job_id}/raw") assert r.status_code == 200 data = r.json() assert "provider_id" in data def test_get_canonical(self, client: TestClient, paddle_payload_bytes: bytes) -> None: job_id = self._create_job(client, paddle_payload_bytes) r = client.get(f"/jobs/{job_id}/canonical") assert r.status_code == 200 data = r.json() assert "document_id" in data assert "pages" in data def test_get_alto(self, client: TestClient, paddle_payload_bytes: bytes) -> None: job_id = self._create_job(client, paddle_payload_bytes) r = client.get(f"/jobs/{job_id}/alto") assert r.status_code == 200 assert r.headers["content-type"] == "application/xml" assert b" None: job_id = self._create_job(client, paddle_payload_bytes) r = client.get(f"/jobs/{job_id}/pagexml") assert r.status_code == 200 assert r.headers["content-type"] == "application/xml" assert b"PcGts" in r.content def test_nonexistent_export(self, client: TestClient) -> None: r = client.get("/jobs/nonexistent/alto") assert r.status_code == 404 def test_nonexistent_raw(self, client: TestClient) -> None: r = client.get("/jobs/nonexistent/raw") assert r.status_code == 404 # -- Viewer ------------------------------------------------------------------ class TestViewer: def test_viewer_fallback(self, client: TestClient, paddle_payload_bytes: bytes) -> None: r = client.post( "/jobs", params={ "provider_id": "paddleocr", "provider_family": "word_box_json", "image_width": 2480, "image_height": 3508, }, files={ "raw_payload_file": ( "p.json", io.BytesIO(paddle_payload_bytes), "application/json", ), }, ) job_id = r.json()["job_id"] r = client.get(f"/jobs/{job_id}/viewer") assert r.status_code == 200 data = r.json() assert "image_width" in data assert "image_height" in data def test_viewer_nonexistent(self, client: TestClient) -> None: r = client.get("/jobs/nonexistent/viewer") assert r.status_code == 404