docling-processor / test-scripts /test_docling.py
arjunbhargav212's picture
Upload 12 files
ad5d213 verified
"""
Test Docling Hugging Face API
Usage: python test_docling.py <HF_API_URL>
"""
import sys
import requests
import json
if len(sys.argv) < 2:
print("Usage: python test_docling.py <HF_API_URL>")
print("Example: python test_docling.py https://your-username-docling.hf.space")
sys.exit(1)
HF_URL = sys.argv[1].rstrip('/')
print(f"\n{'='*60}")
print(f"Testing Docling API: {HF_URL}")
print(f"{'='*60}\n")
# Test 1: Health check
print("1. Testing health check...")
try:
resp = requests.get(f"{HF_URL}/")
print(f" Status: {resp.status_code}")
print(f" Response: {resp.json()}")
print(f" βœ… Health check passed!\n")
except Exception as e:
print(f" ❌ Failed: {e}\n")
sys.exit(1)
# Test 2: Check if PDF file exists
import os
test_pdf = "test.pdf"
if not os.path.exists(test_pdf):
print(f"⚠️ No test.pdf found. Please add a test PDF to this directory.")
print(f" Or create a simple test: {HF_URL}/docs")
sys.exit(0)
# Test 3: Full conversion
print(f"2. Testing full document conversion with {test_pdf}...")
try:
with open(test_pdf, 'rb') as f:
resp = requests.post(
f"{HF_URL}/convert",
files={"file": f},
timeout=120
)
print(f" Status: {resp.status_code}")
if resp.status_code == 200:
data = resp.json()
print(f" βœ… Success!")
print(f" File: {data.get('file_name')}")
print(f" Tables: {data.get('document', {}).get('tables_count', 0)}")
print(f" Pages: {data.get('document', {}).get('num_pages', 0)}")
# Show first few tables
tables = data.get('document', {}).get('tables', [])
if tables:
print(f"\n First table preview:")
for table in tables[:1]:
rows = table.get('rows', [])[:3]
for row in rows:
print(f" {row}")
else:
print(f" ❌ Failed: {resp.text}\n")
except Exception as e:
print(f" ❌ Failed: {e}\n")
print(f"\n{'='*60}")
print("Test complete!")
print(f"{'='*60}\n")