File size: 11,246 Bytes
d520909
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
"""
Image Cropping Utilities

Functions for extracting and managing region crops from document images.
"""

import hashlib
import logging
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union

import numpy as np
from PIL import Image

from ..chunks.models import BoundingBox, DocumentChunk

logger = logging.getLogger(__name__)


def crop_region(
    image: Union[np.ndarray, Image.Image],
    bbox: BoundingBox,
    padding_percent: float = 0.02,
) -> np.ndarray:
    """
    Crop a region from an image.

    Args:
        image: Source image (numpy array or PIL Image)
        bbox: Bounding box to crop (can be normalized or pixel)
        padding_percent: Padding to add around the crop (0-1)

    Returns:
        Cropped image as numpy array
    """
    # Convert to numpy if needed
    if isinstance(image, Image.Image):
        image = np.array(image)

    height, width = image.shape[:2]

    # Convert to pixel coordinates if normalized
    if bbox.normalized:
        pixel_bbox = bbox.to_pixel(width, height)
    else:
        pixel_bbox = bbox

    # Apply padding
    pad_x = int(pixel_bbox.width * padding_percent)
    pad_y = int(pixel_bbox.height * padding_percent)

    x_min = max(0, int(pixel_bbox.x_min) - pad_x)
    y_min = max(0, int(pixel_bbox.y_min) - pad_y)
    x_max = min(width, int(pixel_bbox.x_max) + pad_x)
    y_max = min(height, int(pixel_bbox.y_max) + pad_y)

    # Ensure valid crop region
    if x_max <= x_min or y_max <= y_min:
        logger.warning(f"Invalid crop region: ({x_min}, {y_min}, {x_max}, {y_max})")
        return np.zeros((1, 1, 3), dtype=np.uint8)

    return image[y_min:y_max, x_min:x_max].copy()


def crop_chunk(
    image: Union[np.ndarray, Image.Image],
    chunk: DocumentChunk,
    padding_percent: float = 0.02,
) -> np.ndarray:
    """
    Crop the region corresponding to a chunk.

    Args:
        image: Page image
        chunk: Document chunk with bbox
        padding_percent: Padding around crop

    Returns:
        Cropped image
    """
    return crop_region(image, chunk.bbox, padding_percent)


def crop_multiple_regions(
    image: Union[np.ndarray, Image.Image],
    bboxes: List[BoundingBox],
    padding_percent: float = 0.02,
) -> List[np.ndarray]:
    """
    Crop multiple regions from an image.

    Args:
        image: Source image
        bboxes: List of bounding boxes
        padding_percent: Padding around crops

    Returns:
        List of cropped images
    """
    return [crop_region(image, bbox, padding_percent) for bbox in bboxes]


class CropManager:
    """
    Manages crop extraction and storage.

    Provides caching and organized storage for document crops.
    """

    def __init__(
        self,
        output_dir: Union[str, Path],
        format: str = "png",
        quality: int = 95,
    ):
        self.output_dir = Path(output_dir)
        self.format = format.lower()
        self.quality = quality
        self._cache: Dict[str, str] = {}

        # Ensure output directory exists
        self.output_dir.mkdir(parents=True, exist_ok=True)

    def get_crop_path(
        self,
        doc_id: str,
        page: int,
        bbox: BoundingBox,
    ) -> Path:
        """Generate a path for a crop."""
        # Create stable filename from bbox
        bbox_str = f"{bbox.x_min:.4f}_{bbox.y_min:.4f}_{bbox.x_max:.4f}_{bbox.y_max:.4f}"
        bbox_hash = hashlib.md5(bbox_str.encode()).hexdigest()[:8]

        filename = f"{doc_id}_p{page}_{bbox_hash}.{self.format}"
        return self.output_dir / doc_id / filename

    def save_crop(
        self,
        image: Union[np.ndarray, Image.Image],
        doc_id: str,
        page: int,
        bbox: BoundingBox,
        padding_percent: float = 0.02,
    ) -> str:
        """
        Crop and save a region.

        Args:
            image: Source page image
            doc_id: Document ID
            page: Page number
            bbox: Region to crop
            padding_percent: Padding around crop

        Returns:
            Path to saved crop
        """
        # Check cache
        cache_key = f"{doc_id}_{page}_{bbox.xyxy}"
        if cache_key in self._cache:
            return self._cache[cache_key]

        # Crop region
        crop = crop_region(image, bbox, padding_percent)

        # Convert to PIL
        pil_crop = Image.fromarray(crop)

        # Ensure directory exists
        crop_path = self.get_crop_path(doc_id, page, bbox)
        crop_path.parent.mkdir(parents=True, exist_ok=True)

        # Save
        if self.format == "jpg" or self.format == "jpeg":
            pil_crop.save(crop_path, format="JPEG", quality=self.quality)
        else:
            pil_crop.save(crop_path, format=self.format.upper())

        # Cache
        path_str = str(crop_path)
        self._cache[cache_key] = path_str

        return path_str

    def save_chunk_crop(
        self,
        image: Union[np.ndarray, Image.Image],
        chunk: DocumentChunk,
        padding_percent: float = 0.02,
    ) -> str:
        """
        Save crop for a document chunk.

        Args:
            image: Page image
            chunk: Chunk to crop
            padding_percent: Padding around crop

        Returns:
            Path to saved crop
        """
        return self.save_crop(
            image=image,
            doc_id=chunk.doc_id,
            page=chunk.page,
            bbox=chunk.bbox,
            padding_percent=padding_percent,
        )

    def get_cached_crop(
        self,
        doc_id: str,
        page: int,
        bbox: BoundingBox,
    ) -> Optional[str]:
        """Get path to cached crop if it exists."""
        cache_key = f"{doc_id}_{page}_{bbox.xyxy}"
        return self._cache.get(cache_key)

    def load_crop(self, path: Union[str, Path]) -> Optional[np.ndarray]:
        """Load a crop from disk."""
        path = Path(path)
        if not path.exists():
            return None

        try:
            img = Image.open(path)
            return np.array(img)
        except Exception as e:
            logger.warning(f"Failed to load crop {path}: {e}")
            return None

    def clear_cache(self) -> None:
        """Clear the path cache."""
        self._cache.clear()

    def cleanup_doc(self, doc_id: str) -> int:
        """
        Remove all crops for a document.

        Returns number of files removed.
        """
        doc_dir = self.output_dir / doc_id
        if not doc_dir.exists():
            return 0

        count = 0
        for crop_file in doc_dir.glob(f"*.{self.format}"):
            try:
                crop_file.unlink()
                count += 1
            except Exception:
                pass

        # Remove directory if empty
        try:
            doc_dir.rmdir()
        except OSError:
            pass

        # Clear cache entries
        self._cache = {
            k: v for k, v in self._cache.items()
            if not k.startswith(f"{doc_id}_")
        }

        return count


def create_annotated_image(
    image: Union[np.ndarray, Image.Image],
    bboxes: List[BoundingBox],
    labels: Optional[List[str]] = None,
    colors: Optional[List[Tuple[int, int, int]]] = None,
    line_width: int = 2,
    font_size: int = 12,
) -> np.ndarray:
    """
    Create an annotated image with bounding boxes.

    Args:
        image: Source image
        bboxes: Bounding boxes to draw
        labels: Optional labels for each box
        colors: Optional colors for each box (RGB tuples)
        line_width: Line width for boxes
        font_size: Font size for labels

    Returns:
        Annotated image as numpy array
    """
    from PIL import ImageDraw, ImageFont

    # Convert to PIL
    if isinstance(image, np.ndarray):
        pil_image = Image.fromarray(image).copy()
    else:
        pil_image = image.copy()

    draw = ImageDraw.Draw(pil_image)
    width, height = pil_image.size

    # Default colors - rotating palette
    default_colors = [
        (255, 0, 0),    # Red
        (0, 255, 0),    # Green
        (0, 0, 255),    # Blue
        (255, 255, 0),  # Yellow
        (255, 0, 255),  # Magenta
        (0, 255, 255),  # Cyan
        (255, 128, 0),  # Orange
        (128, 0, 255),  # Purple
    ]

    # Try to load font
    try:
        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size)
    except Exception:
        font = ImageFont.load_default()

    for i, bbox in enumerate(bboxes):
        # Get color
        if colors and i < len(colors):
            color = colors[i]
        else:
            color = default_colors[i % len(default_colors)]

        # Convert to pixels if normalized
        if bbox.normalized:
            x_min = int(bbox.x_min * width)
            y_min = int(bbox.y_min * height)
            x_max = int(bbox.x_max * width)
            y_max = int(bbox.y_max * height)
        else:
            x_min = int(bbox.x_min)
            y_min = int(bbox.y_min)
            x_max = int(bbox.x_max)
            y_max = int(bbox.y_max)

        # Draw rectangle
        draw.rectangle(
            [(x_min, y_min), (x_max, y_max)],
            outline=color,
            width=line_width,
        )

        # Draw label if provided
        if labels and i < len(labels):
            label = labels[i]
            # Draw label background
            text_bbox = draw.textbbox((x_min, y_min - font_size - 4), label, font=font)
            draw.rectangle(text_bbox, fill=color)
            # Draw text
            draw.text(
                (x_min, y_min - font_size - 4),
                label,
                fill=(255, 255, 255),
                font=font,
            )

    return np.array(pil_image)


def highlight_region(
    image: Union[np.ndarray, Image.Image],
    bbox: BoundingBox,
    highlight_color: Tuple[int, int, int] = (255, 255, 0),
    opacity: float = 0.3,
) -> np.ndarray:
    """
    Highlight a region in an image with semi-transparent overlay.

    Args:
        image: Source image
        bbox: Region to highlight
        highlight_color: Color for highlight (RGB)
        opacity: Opacity of highlight (0-1)

    Returns:
        Image with highlighted region
    """
    # Convert to numpy
    if isinstance(image, Image.Image):
        img_array = np.array(image).copy()
    else:
        img_array = image.copy()

    height, width = img_array.shape[:2]

    # Convert to pixels if normalized
    if bbox.normalized:
        x_min = int(bbox.x_min * width)
        y_min = int(bbox.y_min * height)
        x_max = int(bbox.x_max * width)
        y_max = int(bbox.y_max * height)
    else:
        x_min = int(bbox.x_min)
        y_min = int(bbox.y_min)
        x_max = int(bbox.x_max)
        y_max = int(bbox.y_max)

    # Clip to valid range
    x_min = max(0, x_min)
    y_min = max(0, y_min)
    x_max = min(width, x_max)
    y_max = min(height, y_max)

    # Create overlay
    overlay = np.full((y_max - y_min, x_max - x_min, 3), highlight_color, dtype=np.uint8)

    # Blend with original
    region = img_array[y_min:y_max, x_min:x_max]
    blended = (region * (1 - opacity) + overlay * opacity).astype(np.uint8)
    img_array[y_min:y_max, x_min:x_max] = blended

    return img_array