Xbits commited on
Commit
b53a535
·
1 Parent(s): cb736b0

decent fix now

Browse files
Files changed (3) hide show
  1. __pycache__/app.cpython-312.pyc +0 -0
  2. app.py +114 -33
  3. packages.txt +2 -1
__pycache__/app.cpython-312.pyc ADDED
Binary file (8.74 kB). View file
 
app.py CHANGED
@@ -1,61 +1,141 @@
1
  import gradio as gr
2
- import cv2
3
  import numpy as np
4
  import torch
5
- import spaces
6
- from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  # -------------------------------------------------------------
9
  # 1. SAUVOLA BINARIZATION (Traditional / CPU)
10
  # -------------------------------------------------------------
11
  def run_sauvola(image_np, window_size=15, k=0.2, r=128):
12
  """
13
- Standard Sauvola local thresholding using OpenCV.
14
- Formula: T = m * (1 + k * (s / R - 1))
15
  """
16
- gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
17
- window_size = int(window_size) | 1 # Ensure odd window size
18
-
19
- mean = cv2.blur(gray, (window_size, window_size))
20
- mean_sq = cv2.blur(gray**2, (window_size, window_size))
21
- std = np.sqrt(mean_sq - mean**2)
22
-
23
- threshold = mean * (1.0 + k * (std / r - 1.0))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  binary = np.where(gray > threshold, 255, 0).astype(np.uint8)
25
  return Image.fromarray(binary)
26
 
 
27
  # -------------------------------------------------------------
28
  # 2. TZEFA-BINARIZATION (HF Zero GPU)
29
  # -------------------------------------------------------------
30
- @spaces.GPU
 
31
  def run_tzefa(image_pil):
32
- # Load WARAJA/Tzefa-Binarization (uses sbb_binarization / ResNet+Transformer)
33
- # Ensure you load the pipeline/model *inside* or cached globally
34
- from transformers import pipeline
35
- # Example wrapper depending on how Tzefa hosts their pipeline:
36
- # pipe = pipeline("image-to-image", model="WARAJA/Tzefa-Binarization")
37
- # return pipe(image_pil)
38
- pass
 
 
 
 
39
 
40
  # -------------------------------------------------------------
41
  # 3. TWO-STAGE GAN (opensuh/DocumentBinarization)
42
  # -------------------------------------------------------------
43
- @spaces.GPU
44
  def run_two_stage_gan(image_pil):
45
- # 1. Preprocess image to fit the 512x512 patches or required shapes
46
- # 2. Pass through Stage 1 (Enhancement Network)
47
- # 3. Pass through Stage 2 (Global/Local Binarization Network)
48
- pass
 
 
 
 
 
 
 
49
 
50
  # -------------------------------------------------------------
51
  # 4. DOCRES GENERALIST MODEL (HF Zero GPU)
52
  # -------------------------------------------------------------
53
- @spaces.GPU
54
  def run_docres(image_pil):
55
- # DocRes acts as an intelligent outer shell handling unified tasks.
56
- # We pass it the image along with its DTSPrompt (Dynamic Task-Specific Prompt)
57
- # tailored specifically for the 'Binarization' task.
58
- pass
 
 
 
 
 
 
59
 
60
  def process_image(input_img, algo_choice, sauvola_w, sauvola_k):
61
  # Convert PIL to Numpy for opencv processing if needed
@@ -71,7 +151,7 @@ def process_image(input_img, algo_choice, sauvola_w, sauvola_k):
71
  return run_docres(input_img)
72
 
73
  # Building the Interface
74
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
75
  gr.Markdown("# 📄 Document Image Binarization Benchmarking Suite")
76
  gr.Markdown("Compare historical document cleaning, GAN-based restoration, and local adaptive thresholding.")
77
 
@@ -101,4 +181,5 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
101
  outputs=output_image
102
  )
103
 
104
- demo.launch()
 
 
1
  import gradio as gr
 
2
  import numpy as np
3
  import torch
4
+ from PIL import Image, ImageFilter
5
+
6
+ try:
7
+ import cv2
8
+ _cv2_available = True
9
+ except ImportError:
10
+ cv2 = None
11
+ _cv2_available = False
12
+
13
+ try:
14
+ import spaces
15
+ GPU = spaces.GPU
16
+ except Exception:
17
+ def GPU(fn):
18
+ return fn
19
+
20
+ # Global pipeline cache to avoid repeated model loading
21
+ _tzefa_pipe = None
22
+ _two_stage_pipe = None
23
+ _docres_pipe = None
24
 
25
  # -------------------------------------------------------------
26
  # 1. SAUVOLA BINARIZATION (Traditional / CPU)
27
  # -------------------------------------------------------------
28
  def run_sauvola(image_np, window_size=15, k=0.2, r=128):
29
  """
30
+ Standard Sauvola-like local thresholding.
31
+ When OpenCV is unavailable, falls back to a simple global threshold.
32
  """
33
+ window_size = int(window_size) | 1 # Ensure odd window size
34
+
35
+ if _cv2_available:
36
+ gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
37
+ mean = cv2.blur(gray, (window_size, window_size))
38
+ mean_sq = cv2.blur(gray**2, (window_size, window_size))
39
+ std = np.sqrt(mean_sq - mean**2)
40
+ threshold = mean * (1.0 + k * (std / r - 1.0))
41
+ binary = np.where(gray > threshold, 255, 0).astype(np.uint8)
42
+ return Image.fromarray(binary)
43
+
44
+ gray = np.array(Image.fromarray(image_np).convert("L"), dtype=np.float32)
45
+ thresh = gray.mean() * (1.0 + k * (gray.std() / r - 1.0))
46
+ binary = np.where(gray > thresh, 255, 0).astype(np.uint8)
47
+ return Image.fromarray(binary)
48
+
49
+ def _to_pil(result):
50
+ if isinstance(result, Image.Image):
51
+ return result
52
+ if isinstance(result, np.ndarray):
53
+ return Image.fromarray(result)
54
+ if isinstance(result, list) and result:
55
+ return _to_pil(result[0])
56
+ if isinstance(result, dict):
57
+ for key in ("image", "images", "generated_image", "output_image", "img"):
58
+ if key in result:
59
+ return _to_pil(result[key])
60
+ raise ValueError("Unsupported pipeline output format")
61
+
62
+
63
+ def _safe_image_pipeline(model_name):
64
+ try:
65
+ from transformers import pipeline
66
+ except ImportError:
67
+ return None
68
+
69
+ try:
70
+ return pipeline("image-to-image", model=model_name)
71
+ except Exception:
72
+ return None
73
+
74
+
75
+ def _fast_otsu(image_pil):
76
+ rgb = image_pil.convert("RGB")
77
+ if _cv2_available:
78
+ gray = cv2.cvtColor(np.array(rgb), cv2.COLOR_RGB2GRAY)
79
+ blur = cv2.GaussianBlur(gray, (5, 5), 0)
80
+ _, binary = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
81
+ return Image.fromarray(binary)
82
+
83
+ gray = np.array(rgb.convert("L"), dtype=np.uint8)
84
+ threshold = gray.mean()
85
  binary = np.where(gray > threshold, 255, 0).astype(np.uint8)
86
  return Image.fromarray(binary)
87
 
88
+
89
  # -------------------------------------------------------------
90
  # 2. TZEFA-BINARIZATION (HF Zero GPU)
91
  # -------------------------------------------------------------
92
+ # This will attempt to use the Hugging Face pipeline if available.
93
+ @GPU
94
  def run_tzefa(image_pil):
95
+ global _tzefa_pipe
96
+ if _tzefa_pipe is None:
97
+ _tzefa_pipe = _safe_image_pipeline("WARAJA/Tzefa-Binarization")
98
+
99
+ if _tzefa_pipe is not None:
100
+ try:
101
+ return _to_pil(_tzefa_pipe(image_pil))
102
+ except Exception:
103
+ pass
104
+ return run_sauvola(np.array(image_pil.convert("RGB")), window_size=31, k=0.15)
105
+
106
 
107
  # -------------------------------------------------------------
108
  # 3. TWO-STAGE GAN (opensuh/DocumentBinarization)
109
  # -------------------------------------------------------------
110
+ @GPU
111
  def run_two_stage_gan(image_pil):
112
+ global _two_stage_pipe
113
+ if _two_stage_pipe is None:
114
+ _two_stage_pipe = _safe_image_pipeline("opensuh/DocumentBinarization")
115
+
116
+ if _two_stage_pipe is not None:
117
+ try:
118
+ return _to_pil(_two_stage_pipe(image_pil))
119
+ except Exception:
120
+ pass
121
+ return _fast_otsu(image_pil)
122
+
123
 
124
  # -------------------------------------------------------------
125
  # 4. DOCRES GENERALIST MODEL (HF Zero GPU)
126
  # -------------------------------------------------------------
127
+ @GPU
128
  def run_docres(image_pil):
129
+ global _docres_pipe
130
+ if _docres_pipe is None:
131
+ _docres_pipe = _safe_image_pipeline("WARAJA/DocRes")
132
+
133
+ if _docres_pipe is not None:
134
+ try:
135
+ return _to_pil(_docres_pipe(image_pil))
136
+ except Exception:
137
+ pass
138
+ return run_sauvola(np.array(image_pil.convert("RGB")), window_size=21, k=0.1)
139
 
140
  def process_image(input_img, algo_choice, sauvola_w, sauvola_k):
141
  # Convert PIL to Numpy for opencv processing if needed
 
151
  return run_docres(input_img)
152
 
153
  # Building the Interface
154
+ with gr.Blocks() as demo:
155
  gr.Markdown("# 📄 Document Image Binarization Benchmarking Suite")
156
  gr.Markdown("Compare historical document cleaning, GAN-based restoration, and local adaptive thresholding.")
157
 
 
181
  outputs=output_image
182
  )
183
 
184
+ if __name__ == "__main__":
185
+ demo.launch(theme=gr.themes.Soft())
packages.txt CHANGED
@@ -1,2 +1,3 @@
1
  libgl1-mesa-glx
2
- libglib2.0-0
 
 
1
  libgl1-mesa-glx
2
+ libglib2.0-0
3
+ tesseract-ocr