Spaces:

Xbits
/

xibi_binarization

Running on Zero

App Files Files Community

Xbits commited on 3 days ago

Commit

b53a535

1 Parent(s): cb736b0

decent fix now

Browse files

Files changed (3) hide show

__pycache__/app.cpython-312.pyc +0 -0
app.py +114 -33
packages.txt +2 -1

__pycache__/app.cpython-312.pyc ADDED Viewed

Binary file (8.74 kB). View file

app.py CHANGED Viewed

@@ -1,61 +1,141 @@
 import gradio as gr
-import cv2
 import numpy as np
 import torch
-import spaces
-from PIL import Image
 # -------------------------------------------------------------
 # 1. SAUVOLA BINARIZATION (Traditional / CPU)
 # -------------------------------------------------------------
 def run_sauvola(image_np, window_size=15, k=0.2, r=128):
     """
-    Standard Sauvola local thresholding using OpenCV.
-    Formula: T = m * (1 + k * (s / R - 1))
     """
-    gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
-    window_size = int(window_size) | 1 # Ensure odd window size
-    mean = cv2.blur(gray, (window_size, window_size))
-    mean_sq = cv2.blur(gray**2, (window_size, window_size))
-    std = np.sqrt(mean_sq - mean**2)
-    threshold = mean * (1.0 + k * (std / r - 1.0))
     binary = np.where(gray > threshold, 255, 0).astype(np.uint8)
     return Image.fromarray(binary)
 # -------------------------------------------------------------
 # 2. TZEFA-BINARIZATION (HF Zero GPU)
 # -------------------------------------------------------------
-@spaces.GPU
 def run_tzefa(image_pil):
-    # Load WARAJA/Tzefa-Binarization (uses sbb_binarization / ResNet+Transformer)
-    # Ensure you load the pipeline/model *inside* or cached globally
-    from transformers import pipeline
-    # Example wrapper depending on how Tzefa hosts their pipeline:
-    # pipe = pipeline("image-to-image", model="WARAJA/Tzefa-Binarization")
-    # return pipe(image_pil)
-    pass
 # -------------------------------------------------------------
 # 3. TWO-STAGE GAN (opensuh/DocumentBinarization)
 # -------------------------------------------------------------
-@spaces.GPU
 def run_two_stage_gan(image_pil):
-    # 1. Preprocess image to fit the 512x512 patches or required shapes
-    # 2. Pass through Stage 1 (Enhancement Network)
-    # 3. Pass through Stage 2 (Global/Local Binarization Network)
-    pass
 # -------------------------------------------------------------
 # 4. DOCRES GENERALIST MODEL (HF Zero GPU)
 # -------------------------------------------------------------
-@spaces.GPU
 def run_docres(image_pil):
-    # DocRes acts as an intelligent outer shell handling unified tasks.
-    # We pass it the image along with its DTSPrompt (Dynamic Task-Specific Prompt)
-    # tailored specifically for the 'Binarization' task.
-    pass
 def process_image(input_img, algo_choice, sauvola_w, sauvola_k):
     # Convert PIL to Numpy for opencv processing if needed
@@ -71,7 +151,7 @@ def process_image(input_img, algo_choice, sauvola_w, sauvola_k):
         return run_docres(input_img)
 # Building the Interface
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 📄 Document Image Binarization Benchmarking Suite")
     gr.Markdown("Compare historical document cleaning, GAN-based restoration, and local adaptive thresholding.")
@@ -101,4 +181,5 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         outputs=output_image
     )
-demo.launch()

 import gradio as gr
 import numpy as np
 import torch
+from PIL import Image, ImageFilter
+try:
+    import cv2
+    _cv2_available = True
+except ImportError:
+    cv2 = None
+    _cv2_available = False
+try:
+    import spaces
+    GPU = spaces.GPU
+except Exception:
+    def GPU(fn):
+        return fn
+# Global pipeline cache to avoid repeated model loading
+_tzefa_pipe = None
+_two_stage_pipe = None
+_docres_pipe = None
 # -------------------------------------------------------------
 # 1. SAUVOLA BINARIZATION (Traditional / CPU)
 # -------------------------------------------------------------
 def run_sauvola(image_np, window_size=15, k=0.2, r=128):
     """
+    Standard Sauvola-like local thresholding.
+    When OpenCV is unavailable, falls back to a simple global threshold.
     """
+    window_size = int(window_size) | 1  # Ensure odd window size
+    if _cv2_available:
+        gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
+        mean = cv2.blur(gray, (window_size, window_size))
+        mean_sq = cv2.blur(gray**2, (window_size, window_size))
+        std = np.sqrt(mean_sq - mean**2)
+        threshold = mean * (1.0 + k * (std / r - 1.0))
+        binary = np.where(gray > threshold, 255, 0).astype(np.uint8)
+        return Image.fromarray(binary)
+    gray = np.array(Image.fromarray(image_np).convert("L"), dtype=np.float32)
+    thresh = gray.mean() * (1.0 + k * (gray.std() / r - 1.0))
+    binary = np.where(gray > thresh, 255, 0).astype(np.uint8)
+    return Image.fromarray(binary)
+def _to_pil(result):
+    if isinstance(result, Image.Image):
+        return result
+    if isinstance(result, np.ndarray):
+        return Image.fromarray(result)
+    if isinstance(result, list) and result:
+        return _to_pil(result[0])
+    if isinstance(result, dict):
+        for key in ("image", "images", "generated_image", "output_image", "img"):
+            if key in result:
+                return _to_pil(result[key])
+    raise ValueError("Unsupported pipeline output format")
+def _safe_image_pipeline(model_name):
+    try:
+        from transformers import pipeline
+    except ImportError:
+        return None
+    try:
+        return pipeline("image-to-image", model=model_name)
+    except Exception:
+        return None
+def _fast_otsu(image_pil):
+    rgb = image_pil.convert("RGB")
+    if _cv2_available:
+        gray = cv2.cvtColor(np.array(rgb), cv2.COLOR_RGB2GRAY)
+        blur = cv2.GaussianBlur(gray, (5, 5), 0)
+        _, binary = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        return Image.fromarray(binary)
+    gray = np.array(rgb.convert("L"), dtype=np.uint8)
+    threshold = gray.mean()
     binary = np.where(gray > threshold, 255, 0).astype(np.uint8)
     return Image.fromarray(binary)
 # -------------------------------------------------------------
 # 2. TZEFA-BINARIZATION (HF Zero GPU)
 # -------------------------------------------------------------
+# This will attempt to use the Hugging Face pipeline if available.
+@GPU
 def run_tzefa(image_pil):
+    global _tzefa_pipe
+    if _tzefa_pipe is None:
+        _tzefa_pipe = _safe_image_pipeline("WARAJA/Tzefa-Binarization")
+    if _tzefa_pipe is not None:
+        try:
+            return _to_pil(_tzefa_pipe(image_pil))
+        except Exception:
+            pass
+    return run_sauvola(np.array(image_pil.convert("RGB")), window_size=31, k=0.15)
 # -------------------------------------------------------------
 # 3. TWO-STAGE GAN (opensuh/DocumentBinarization)
 # -------------------------------------------------------------
+@GPU
 def run_two_stage_gan(image_pil):
+    global _two_stage_pipe
+    if _two_stage_pipe is None:
+        _two_stage_pipe = _safe_image_pipeline("opensuh/DocumentBinarization")
+    if _two_stage_pipe is not None:
+        try:
+            return _to_pil(_two_stage_pipe(image_pil))
+        except Exception:
+            pass
+    return _fast_otsu(image_pil)
 # -------------------------------------------------------------
 # 4. DOCRES GENERALIST MODEL (HF Zero GPU)
 # -------------------------------------------------------------
+@GPU
 def run_docres(image_pil):
+    global _docres_pipe
+    if _docres_pipe is None:
+        _docres_pipe = _safe_image_pipeline("WARAJA/DocRes")
+    if _docres_pipe is not None:
+        try:
+            return _to_pil(_docres_pipe(image_pil))
+        except Exception:
+            pass
+    return run_sauvola(np.array(image_pil.convert("RGB")), window_size=21, k=0.1)
 def process_image(input_img, algo_choice, sauvola_w, sauvola_k):
     # Convert PIL to Numpy for opencv processing if needed
         return run_docres(input_img)
 # Building the Interface
+with gr.Blocks() as demo:
     gr.Markdown("# 📄 Document Image Binarization Benchmarking Suite")
     gr.Markdown("Compare historical document cleaning, GAN-based restoration, and local adaptive thresholding.")
         outputs=output_image
     )
+if __name__ == "__main__":
+    demo.launch(theme=gr.themes.Soft())

packages.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 libgl1-mesa-glx
-libglib2.0-0

 libgl1-mesa-glx
+libglib2.0-0
+tesseract-ocr