File size: 2,750 Bytes
e2ec8a2
 
 
 
 
 
 
 
bbbfba8
 
e2ec8a2
 
 
 
 
 
 
 
bbbfba8
 
 
e2ec8a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbbfba8
e2ec8a2
bbbfba8
e2ec8a2
bbbfba8
e2ec8a2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""Export eligibility validator — decides what can be exported.

Consumes readiness assessments and document policy to produce
an ExportEligibility decision for the whole document.
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from src.app.domain.models.readiness import ExportEligibility
from src.app.domain.models.status import ReadinessLevel
from src.app.policies.document_policy import DocumentPolicy
from src.app.validators.readiness_validator import (
    compute_page_alto_readiness,
    compute_page_pagexml_readiness,
)

if TYPE_CHECKING:
    from src.app.domain.models import CanonicalDocument


def compute_export_eligibility(
    doc: CanonicalDocument,
    policy: DocumentPolicy | None = None,
) -> ExportEligibility:
    """Compute export eligibility for a document.

    Args:
        doc: The canonical document.
        policy: Document policy (uses default if None).

    Returns:
        ExportEligibility with per-format readiness levels.
    """
    if policy is None:
        policy = DocumentPolicy()

    alto_levels: list[ReadinessLevel] = []
    page_levels: list[ReadinessLevel] = []

    for page in doc.pages:
        alto_levels.append(compute_page_alto_readiness(page).level)
        page_levels.append(compute_page_pagexml_readiness(page).level)

    alto_export = _aggregate_levels(alto_levels)
    page_export = _aggregate_levels(page_levels)

    # Apply policy constraints
    if policy.strict_mode:
        # In strict mode, partial is downgraded to none
        if alto_export == ReadinessLevel.PARTIAL:
            alto_export = ReadinessLevel.NONE
        if page_export == ReadinessLevel.PARTIAL:
            page_export = ReadinessLevel.NONE

    # Viewer is more lenient — it can render degraded content
    if alto_export != ReadinessLevel.NONE or page_export != ReadinessLevel.NONE:
        viewer_render = ReadinessLevel.FULL
    elif any(len(p.text_regions) > 0 for p in doc.pages):
        viewer_render = ReadinessLevel.DEGRADED
    else:
        viewer_render = ReadinessLevel.NONE

    return ExportEligibility(
        alto_export=alto_export,
        page_export=page_export,
        viewer_render=viewer_render,
    )


def _aggregate_levels(levels: list[ReadinessLevel]) -> ReadinessLevel:
    """Aggregate per-page readiness into a single document-level readiness."""
    if not levels:
        return ReadinessLevel.NONE

    if all(lv == ReadinessLevel.FULL for lv in levels):
        return ReadinessLevel.FULL
    if all(lv == ReadinessLevel.NONE for lv in levels):
        return ReadinessLevel.NONE
    if any(lv in (ReadinessLevel.FULL, ReadinessLevel.PARTIAL) for lv in levels):
        return ReadinessLevel.PARTIAL
    return ReadinessLevel.DEGRADED