Spaces:

Netrava
/

omniparser-api

Runtime error

App Files Files Community

Netrava commited on Aug 3, 2025

Commit

0b851ec

verified ·

1 Parent(s): 86062d2

Upload 4 files

Browse files

Files changed (4) hide show

README-simplified.md +62 -0
app.py +209 -72
app_simplified.py +285 -0
requirements.txt +3 -2

README-simplified.md ADDED Viewed

	@@ -0,0 +1,62 @@

+---
+title: OmniParser v2.0 API (Simplified)
+emoji: 🖼️
+colorFrom: blue
+colorTo: indigo
+sdk: gradio
+sdk_version: 4.0.0
+app_file: app_simplified.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# OmniParser v2.0 API (Simplified Version)
+This is a simplified version of the OmniParser v2.0 API that simulates the functionality without using the actual models. It's provided as a fallback in case the full version has compatibility issues.
+## Features
+- Simulates parsing UI screenshots into structured JSON data
+- Identifies interactive elements (buttons, menus, icons, etc.)
+- Provides captions describing the functionality of each element
+- Returns visualization of detected elements
+- Accessible via a simple REST API
+## API Usage
+You can use this API by sending a POST request with a file upload:
+```python
+import requests
+# Replace with your actual API URL after deployment
+OMNIPARSER_API_URL = "https://your-username-omniparser-api.hf.space/api/parse"
+# Upload a file
+files = {'image': open('screenshot.png', 'rb')}
+# Send request
+response = requests.post(OMNIPARSER_API_URL, files=files)
+# Get JSON result
+result = response.json()
+# Access parsed elements
+elements = result["elements"]
+for element in elements:
+    print(f"Element {element['id']}: {element['text']} - {element['caption']}")
+    print(f"Coordinates: {element['coordinates']}")
+    print(f"Interactable: {element['is_interactable']}")
+    print(f"Confidence: {element['confidence']}")
+    print("---")
+# Access visualization (base64 encoded image)
+visualization_base64 = result["visualization"]
+```
+## Note
+This is a simplified version that simulates OmniParser functionality. It does not use the actual OmniParser models. The elements detected are generated randomly and do not represent actual UI elements in the image.
+For the full version that uses the actual OmniParser models, please see the main repository.

app.py CHANGED Viewed

@@ -54,6 +54,20 @@ def setup_omniparser():
             if os.path.exists("OmniParser/weights/icon_caption") and not os.path.exists("OmniParser/weights/icon_caption_florence"):
                 os.rename("OmniParser/weights/icon_caption", "OmniParser/weights/icon_caption_florence")
         print("OmniParser setup completed successfully!")
         return True
     except Exception as e:
@@ -63,11 +77,61 @@ def setup_omniparser():
 # Setup OmniParser
 setup_success = setup_omniparser()
 # Import OmniParser utilities
 if setup_success:
     try:
-        from OmniParser.util.utils import check_ocr_box, get_yolo_model, get_caption_model_processor, get_som_labeled_img
-        print("Successfully imported OmniParser utilities")
     except ImportError as e:
         print(f"Error importing OmniParser utilities: {str(e)}")
         # Fallback to a simple error message
@@ -96,11 +160,76 @@ try:
         model_name_or_path="OmniParser/weights/icon_caption_florence"
     )
     print("Models initialized successfully")
 except Exception as e:
     print(f"Error initializing models: {str(e)}")
     # Create dummy models for graceful failure
     yolo_model = None
     caption_model_processor = None
 def process_image(
     image: Image.Image,
@@ -123,12 +252,9 @@ def process_image(
         Dictionary with parsed elements and visualization
     """
     # Check if models are initialized
-    if yolo_model is None or caption_model_processor is None:
-        return {
-            "error": "Models not initialized properly. Please check the logs.",
-            "elements": [],
-            "visualization": image
-        }
     try:
         # Calculate overlay ratio based on image size
@@ -143,75 +269,73 @@ def process_image(
         }
         # Run OCR to detect text
-        ocr_bbox_rslt, is_goal_filtered = check_ocr_box(
-            image,
-            display_img=False,
-            output_bb_format='xyxy',
-            goal_filtering=None,
-            easyocr_args={'paragraph': False, 'text_threshold': 0.9},
-            use_paddleocr=use_paddleocr
-        )
-        # Check if OCR returned an error message (string)
-        if isinstance(ocr_bbox_rslt, str):
-            return {
-                "error": ocr_bbox_rslt,
-                "elements": [],
-                "visualization": image
-            }
-        text, ocr_bbox = ocr_bbox_rslt
         # Process image with OmniParser
-        dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
-            image,
-            yolo_model,
-            BOX_TRESHOLD=box_threshold,
-            output_coord_in_ratio=True,
-            ocr_bbox=ocr_bbox,
-            draw_bbox_config=draw_bbox_config,
-            caption_model_processor=caption_model_processor,
-            ocr_text=text,
-            iou_threshold=iou_threshold,
-            imgsz=imgsz
-        )
-        # Check if get_som_labeled_img returned an error message (string)
-        if isinstance(dino_labled_img, str) and not dino_labled_img.startswith("data:"):
             return {
-                "error": dino_labled_img,
-                "elements": [],
-                "visualization": image
             }
-        # Convert base64 image to PIL Image
-        visualization = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
-        # Create structured output
-        elements = []
-        for i, element in enumerate(parsed_content_list):
-            elements.append({
-                "id": i,
-                "text": element.get("text", ""),
-                "caption": element.get("caption", ""),
-                "coordinates": element.get("coordinates", []),
-                "is_interactable": element.get("is_interactable", False),
-                "confidence": element.get("confidence", 0.0)
-            })
-        # Return structured data and visualization
-        return {
-            "elements": elements,
-            "visualization": visualization
-        }
     except Exception as e:
-        print(f"Error processing image: {str(e)}")
-        # Return error message and empty results
-        return {
-            "error": f"Error processing image: {str(e)}",
-            "elements": [],
-            "visualization": image
-        }
 # API endpoint function
 def api_endpoint(image):
@@ -278,6 +402,12 @@ def handle_submission(image, box_threshold=0.05, iou_threshold=0.1, use_paddleoc
     # Return the result
     if "error" in result:
         return {"error": result["error"]}, result.get("visualization", None)
     else:
         return {"elements": result["elements"]}, result["visualization"]
@@ -370,9 +500,16 @@ with gr.Blocks() as demo:
         api_name="parse"  # This creates the /api/parse endpoint
     )
     # Update status on load
     demo.load(
-        fn=lambda: f"OmniParser v2.0 API - Running on {'GPU' if torch.cuda.is_available() else 'CPU'}",
         outputs=status
     )

             if os.path.exists("OmniParser/weights/icon_caption") and not os.path.exists("OmniParser/weights/icon_caption_florence"):
                 os.rename("OmniParser/weights/icon_caption", "OmniParser/weights/icon_caption_florence")
+        # Patch PaddleOCR initialization in utils.py to fix compatibility issue
+        utils_path = os.path.join(omniparser_path, "util", "utils.py")
+        if os.path.exists(utils_path):
+            print("Patching utils.py to fix PaddleOCR compatibility...")
+            with open(utils_path, 'r') as f:
+                content = f.read()
+            # Remove the problematic 'use_dilation' parameter
+            if "use_dilation=True" in content:
+                content = content.replace("use_dilation=True", "")
+                with open(utils_path, 'w') as f:
+                    f.write(content)
+                print("Successfully patched utils.py")
         print("OmniParser setup completed successfully!")
         return True
     except Exception as e:
 # Setup OmniParser
 setup_success = setup_omniparser()
+# Create our own implementation of check_ocr_box to avoid PaddleOCR issues
+def custom_check_ocr_box(image, display_img=False, output_bb_format='xyxy', goal_filtering=None,
+                         easyocr_args=None, use_paddleocr=True):
+    """
+    Custom implementation of check_ocr_box that doesn't rely on PaddleOCR
+    """
+    print("Using custom OCR implementation (EasyOCR only)")
+    try:
+        import easyocr
+        import numpy as np
+        # Convert PIL Image to numpy array
+        img_np = np.array(image)
+        # Initialize EasyOCR
+        reader = easyocr.Reader(['en'])
+        # Run OCR
+        results = reader.readtext(img_np)
+        # Extract text and bounding boxes
+        texts = []
+        boxes = []
+        for result in results:
+            box, text, _ = result
+            texts.append(text)
+            # Convert box format if needed
+            if output_bb_format == 'xyxy':
+                # Convert from [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] to [x1,y1,x3,y3]
+                x1, y1 = box[0]
+                x3, y3 = box[2]
+                boxes.append([x1, y1, x3, y3])
+            else:
+                boxes.append(box)
+        return (texts, boxes), False
+    except Exception as e:
+        print(f"Error in custom OCR: {str(e)}")
+        return ([], []), False
 # Import OmniParser utilities
 if setup_success:
     try:
+        # First try to import the patched version
+        from OmniParser.util.utils import get_yolo_model, get_caption_model_processor, get_som_labeled_img
+        # Try to import check_ocr_box, but use our custom version if it fails
+        try:
+            from OmniParser.util.utils import check_ocr_box
+            print("Successfully imported all OmniParser utilities")
+        except (ImportError, ValueError) as e:
+            print(f"Using custom OCR implementation due to error: {str(e)}")
+            check_ocr_box = custom_check_ocr_box
     except ImportError as e:
         print(f"Error importing OmniParser utilities: {str(e)}")
         # Fallback to a simple error message
         model_name_or_path="OmniParser/weights/icon_caption_florence"
     )
     print("Models initialized successfully")
+    models_initialized = True
 except Exception as e:
     print(f"Error initializing models: {str(e)}")
     # Create dummy models for graceful failure
     yolo_model = None
     caption_model_processor = None
+    models_initialized = False
+# Fallback implementation for when OmniParser fails
+def fallback_process_image(image):
+    """
+    Fallback implementation that simulates OmniParser functionality
+    for when the actual models fail to load
+    """
+    from PIL import Image, ImageDraw, ImageFont
+    import random
+    # Create a copy of the image for visualization
+    vis_img = image.copy()
+    draw = ImageDraw.Draw(vis_img)
+    # Define some mock UI element types
+    element_types = ["Button", "Text Field", "Checkbox", "Dropdown", "Menu Item", "Icon", "Link"]
+    # Generate some random elements
+    elements = []
+    num_elements = min(10, int(image.width * image.height / 50000))  # Scale with image size
+    for i in range(num_elements):
+        # Generate random position and size
+        x1 = random.randint(0, image.width - 100)
+        y1 = random.randint(0, image.height - 50)
+        width = random.randint(50, 200)
+        height = random.randint(30, 80)
+        x2 = min(x1 + width, image.width)
+        y2 = min(y1 + height, image.height)
+        # Generate random element type and caption
+        element_type = random.choice(element_types)
+        captions = {
+            "Button": ["Submit", "Cancel", "OK", "Apply", "Save"],
+            "Text Field": ["Enter text", "Username", "Password", "Search", "Email"],
+            "Checkbox": ["Select option", "Enable feature", "Remember me", "Agree to terms"],
+            "Dropdown": ["Select item", "Choose option", "Select country", "Language"],
+            "Menu Item": ["File", "Edit", "View", "Help", "Tools", "Settings"],
+            "Icon": ["Home", "Settings", "Profile", "Notification", "Search"],
+            "Link": ["Learn more", "Click here", "Details", "Documentation", "Help"]
+        }
+        text = random.choice(captions[element_type])
+        caption = f"{element_type}: {text}"
+        # Add to elements list
+        elements.append({
+            "id": i,
+            "text": text,
+            "caption": caption,
+            "coordinates": [x1/image.width, y1/image.height, x2/image.width, y2/image.height],
+            "is_interactable": element_type in ["Button", "Checkbox", "Dropdown", "Link", "Text Field"],
+            "confidence": random.uniform(0.7, 0.95)
+        })
+        # Draw on visualization
+        draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
+        draw.text((x1, y1 - 10), f"{i}: {text}", fill="red")
+    return {
+        "elements": elements,
+        "visualization": vis_img,
+        "note": "This is a fallback visualization as OmniParser models could not be loaded."
+    }
 def process_image(
     image: Image.Image,
         Dictionary with parsed elements and visualization
     """
     # Check if models are initialized
+    if not models_initialized or yolo_model is None or caption_model_processor is None:
+        print("Models not initialized properly, using fallback implementation")
+        return fallback_process_image(image)
     try:
         # Calculate overlay ratio based on image size
         }
         # Run OCR to detect text
+        try:
+            ocr_bbox_rslt, is_goal_filtered = check_ocr_box(
+                image,
+                display_img=False,
+                output_bb_format='xyxy',
+                goal_filtering=None,
+                easyocr_args={'paragraph': False, 'text_threshold': 0.9},
+                use_paddleocr=use_paddleocr
+            )
+            # Check if OCR returned an error message (string)
+            if isinstance(ocr_bbox_rslt, str):
+                print(f"OCR error: {ocr_bbox_rslt}, using fallback implementation")
+                return fallback_process_image(image)
+            text, ocr_bbox = ocr_bbox_rslt
+        except Exception as e:
+            print(f"OCR error: {str(e)}, using fallback implementation")
+            return fallback_process_image(image)
         # Process image with OmniParser
+        try:
+            dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
+                image,
+                yolo_model,
+                BOX_TRESHOLD=box_threshold,
+                output_coord_in_ratio=True,
+                ocr_bbox=ocr_bbox,
+                draw_bbox_config=draw_bbox_config,
+                caption_model_processor=caption_model_processor,
+                ocr_text=text,
+                iou_threshold=iou_threshold,
+                imgsz=imgsz
+            )
+            # Check if get_som_labeled_img returned an error message (string)
+            if isinstance(dino_labled_img, str) and not dino_labled_img.startswith("data:"):
+                print(f"OmniParser error: {dino_labled_img}, using fallback implementation")
+                return fallback_process_image(image)
+            # Convert base64 image to PIL Image
+            visualization = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
+            # Create structured output
+            elements = []
+            for i, element in enumerate(parsed_content_list):
+                elements.append({
+                    "id": i,
+                    "text": element.get("text", ""),
+                    "caption": element.get("caption", ""),
+                    "coordinates": element.get("coordinates", []),
+                    "is_interactable": element.get("is_interactable", False),
+                    "confidence": element.get("confidence", 0.0)
+                })
+            # Return structured data and visualization
             return {
+                "elements": elements,
+                "visualization": visualization
             }
+        except Exception as e:
+            print(f"OmniParser error: {str(e)}, using fallback implementation")
+            return fallback_process_image(image)
     except Exception as e:
+        print(f"Error processing image: {str(e)}, using fallback implementation")
+        # Use fallback implementation
+        return fallback_process_image(image)
 # API endpoint function
 def api_endpoint(image):
     # Return the result
     if "error" in result:
         return {"error": result["error"]}, result.get("visualization", None)
+    elif "note" in result:
+        # This is from the fallback implementation
+        return {
+            "note": result["note"],
+            "elements": result["elements"]
+        }, result["visualization"]
     else:
         return {"elements": result["elements"]}, result["visualization"]
         api_name="parse"  # This creates the /api/parse endpoint
     )
+    # Function to get status
+    def get_status():
+        if models_initialized:
+            return f"✅ OmniParser v2.0 API - Running on {'GPU' if torch.cuda.is_available() else 'CPU'}"
+        else:
+            return "⚠️ OmniParser v2.0 API - Running in fallback mode (models not loaded)"
     # Update status on load
     demo.load(
+        fn=get_status,
         outputs=status
     )

app_simplified.py ADDED Viewed

	@@ -0,0 +1,285 @@

+import os
+import io
+import json
+import base64
+import random
+import gradio as gr
+import numpy as np
+import torch
+from PIL import Image, ImageDraw, ImageFont
+from typing import Dict, Any, List
+# Simplified OmniParser API that doesn't rely on the actual OmniParser repository
+# This is a fallback in case the main app.py has issues with dependencies
+def process_image(image):
+    """
+    Simplified implementation that simulates OmniParser functionality
+    """
+    if image is None:
+        return {
+            "error": "No image provided",
+            "elements": [],
+            "visualization": None
+        }
+    # Create a copy of the image for visualization
+    vis_img = image.copy()
+    draw = ImageDraw.Draw(vis_img)
+    # Define some mock UI element types
+    element_types = ["Button", "Text Field", "Checkbox", "Dropdown", "Menu Item", "Icon", "Link"]
+    # Generate some random elements
+    elements = []
+    num_elements = min(15, int(image.width * image.height / 40000))  # Scale with image size
+    for i in range(num_elements):
+        # Generate random position and size
+        x1 = random.randint(0, image.width - 100)
+        y1 = random.randint(0, image.height - 50)
+        width = random.randint(50, 200)
+        height = random.randint(30, 80)
+        x2 = min(x1 + width, image.width)
+        y2 = min(y1 + height, image.height)
+        # Generate random element type and caption
+        element_type = random.choice(element_types)
+        captions = {
+            "Button": ["Submit", "Cancel", "OK", "Apply", "Save"],
+            "Text Field": ["Enter text", "Username", "Password", "Search", "Email"],
+            "Checkbox": ["Select option", "Enable feature", "Remember me", "Agree to terms"],
+            "Dropdown": ["Select item", "Choose option", "Select country", "Language"],
+            "Menu Item": ["File", "Edit", "View", "Help", "Tools", "Settings"],
+            "Icon": ["Home", "Settings", "Profile", "Notification", "Search"],
+            "Link": ["Learn more", "Click here", "Details", "Documentation", "Help"]
+        }
+        text = random.choice(captions[element_type])
+        caption = f"{element_type}: {text}"
+        # Add to elements list
+        elements.append({
+            "id": i,
+            "text": text,
+            "caption": caption,
+            "coordinates": [x1/image.width, y1/image.height, x2/image.width, y2/image.height],
+            "is_interactable": element_type in ["Button", "Checkbox", "Dropdown", "Link", "Text Field"],
+            "confidence": random.uniform(0.7, 0.95)
+        })
+        # Draw on visualization
+        draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
+        draw.text((x1, y1 - 10), f"{i}: {text}", fill="red")
+    return {
+        "elements": elements,
+        "visualization": vis_img,
+        "note": "This is a simplified implementation that simulates OmniParser functionality."
+    }
+# API endpoint function
+def api_endpoint(image):
+    """
+    API endpoint that accepts an image and returns parsed elements
+    Args:
+        image: Uploaded image file
+    Returns:
+        JSON with parsed elements
+    """
+    if image is None:
+        return json.dumps({"error": "No image provided"})
+    try:
+        # Process the image
+        result = process_image(image)
+        # Check if there was an error
+        if "error" in result:
+            return json.dumps({
+                "status": "error",
+                "error": result["error"],
+                "elements": []
+            })
+        # Convert visualization to base64 for JSON response
+        buffered = io.BytesIO()
+        result["visualization"].save(buffered, format="PNG")
+        img_str = base64.b64encode(buffered.getvalue()).decode()
+        # Create response
+        response = {
+            "status": "success",
+            "note": result.get("note", ""),
+            "elements": result["elements"],
+            "visualization": img_str
+        }
+        return json.dumps(response)
+    except Exception as e:
+        print(f"API endpoint error: {str(e)}")
+        return json.dumps({
+            "status": "error",
+            "error": f"API processing error: {str(e)}",
+            "elements": []
+        })
+# Function to handle UI submission
+def handle_submission(image):
+    """Handle UI submission and provide appropriate feedback"""
+    if image is None:
+        return {"error": "No image provided"}, None
+    # Process the image
+    result = process_image(image)
+    # Return the result
+    if "error" in result:
+        return {"error": result["error"]}, result.get("visualization", None)
+    else:
+        return {
+            "note": result.get("note", ""),
+            "elements": result["elements"]
+        }, result["visualization"]
+# Create test image if it doesn't exist
+def create_test_ui_image():
+    """Create a simple test UI image with buttons and text"""
+    # Create a new image with white background
+    width, height = 800, 600
+    image = Image.new('RGB', (width, height), color='white')
+    draw = ImageDraw.Draw(image)
+    # Try to load a font, use default if not available
+    try:
+        font = ImageFont.truetype("arial.ttf", 20)
+        small_font = ImageFont.truetype("arial.ttf", 16)
+    except IOError:
+        font = ImageFont.load_default()
+        small_font = ImageFont.load_default()
+    # Draw a header
+    draw.rectangle([(0, 0), (width, 60)], fill='#4285F4')
+    draw.text((20, 15), "Test UI Application", fill='white', font=font)
+    # Draw a sidebar
+    draw.rectangle([(0, 60), (200, height)], fill='#F1F1F1')
+    # Draw menu items in sidebar
+    menu_items = ["Home", "Profile", "Settings", "Help", "Logout"]
+    for i, item in enumerate(menu_items):
+        y = 100 + i * 50
+        # Highlight one item
+        if item == "Settings":
+            draw.rectangle([(10, y-10), (190, y+30)], fill='#E1E1E1')
+        draw.text((20, y), item, fill='black', font=font)
+    # Draw main content area
+    draw.text((220, 80), "Welcome to the Test UI", fill='black', font=font)
+    # Draw a form
+    draw.text((220, 150), "Please enter your information:", fill='black', font=font)
+    # Draw form fields
+    fields = ["Name", "Email", "Phone"]
+    for i, field in enumerate(fields):
+        y = 200 + i * 60
+        draw.text((220, y), f"{field}:", fill='black', font=font)
+        draw.rectangle([(320, y-5), (700, y+25)], outline='black')
+    # Draw buttons
+    draw.rectangle([(220, 400), (320, 440)], fill='#4285F4')
+    draw.text((240, 410), "Submit", fill='white', font=font)
+    draw.rectangle([(340, 400), (440, 440)], fill='#9E9E9E')
+    draw.text((360, 410), "Cancel", fill='white', font=font)
+    # Draw a checkbox
+    draw.rectangle([(220, 470), (240, 490)], outline='black')
+    draw.text((250, 470), "Remember me", fill='black', font=small_font)
+    # Save the image
+    os.makedirs("static", exist_ok=True)
+    image_path = "static/test_ui.png"
+    image.save(image_path)
+    print(f"Test UI image created at {image_path}")
+    return image_path
+# Create test image if it doesn't exist
+try:
+    if not os.path.exists("static/test_ui.png"):
+        print("Creating test UI image...")
+        test_image_path = create_test_ui_image()
+        print(f"Test image created at {test_image_path}")
+except Exception as e:
+    print(f"Error creating test image: {str(e)}")
+# Create Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("""
+    # OmniParser v2.0 API (Simplified Version)
+    Upload an image to parse UI elements and get structured data.
+    ## Quick Start
+    You can use the [test UI image](/file=static/test_ui.png) to try out the API, or upload your own UI screenshot.
+    ## API Usage
+    You can use this API by sending a POST request with a file upload to this URL.
+    ```python
+    import requests
+    # Replace with your actual API URL after deployment
+    OMNIPARSER_API_URL = "https://your-username-omniparser-api.hf.space/api/parse"
+    # Upload a file
+    files = {'image': open('screenshot.png', 'rb')}
+    # Send request
+    response = requests.post(OMNIPARSER_API_URL, files=files)
+    # Get JSON result
+    result = response.json()
+    ```
+    ## Note
+    This is a simplified version that simulates OmniParser functionality. It does not use the actual OmniParser models.
+    """)
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type='pil', label='Upload image')
+            # Function to load test image
+            def load_test_image():
+                if os.path.exists("static/test_ui.png"):
+                    return Image.open("static/test_ui.png")
+                return None
+            test_image_button = gr.Button(value='Load Test Image')
+            test_image_button.click(fn=load_test_image, inputs=[], outputs=[image_input])
+            submit_button = gr.Button(value='Parse Image', variant='primary')
+            # Status message
+            status = gr.Markdown("⚠️ OmniParser v2.0 API - Running in simplified mode (without actual models)")
+        with gr.Column():
+            json_output = gr.JSON(label='Parsed Elements (JSON)')
+            image_output = gr.Image(type='pil', label='Visualization')
+    # Connect the interface
+    submit_button.click(
+        fn=handle_submission,
+        inputs=[image_input],
+        outputs=[json_output, image_output],
+        api_name="parse"  # This creates the /api/parse endpoint
+    )
+# Launch the app
+demo.launch()

requirements.txt CHANGED Viewed

@@ -5,8 +5,9 @@ transformers>=4.30.0
 pillow>=9.0.0
 numpy>=1.24.0
 easyocr>=1.7.0
-paddleocr>=2.6.0
-paddlepaddle>=2.4.0
 opencv-python>=4.7.0
 huggingface_hub>=0.16.0
 peft>=0.4.0

 pillow>=9.0.0
 numpy>=1.24.0
 easyocr>=1.7.0
+# Use a specific version of paddleocr that works with our patch
+paddleocr==2.6.0.3
+paddlepaddle==2.4.2
 opencv-python>=4.7.0
 huggingface_hub>=0.16.0
 peft>=0.4.0