Spaces:
Sleeping
Sleeping
| from flask import Flask, render_template_string, request, jsonify | |
| from flask_cors import CORS | |
| from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline | |
| import os | |
| import sys | |
| import threading | |
| import time | |
| app = Flask(__name__) | |
| CORS(app) | |
| # Model loading state (thread-safe) | |
| model_name = "openai/privacy-filter" | |
| classifier = None | |
| model_loading = False | |
| model_error = None | |
| model_thread = None | |
| # Background model loading | |
| def load_model_async(): | |
| global classifier, model_loading, model_error | |
| model_loading = True | |
| print("="*60, flush=True) | |
| print("BACKGROUND: Loading OpenAI Privacy Filter model...", flush=True) | |
| print("="*60, flush=True) | |
| try: | |
| print(f"Loading tokenizer and model: {model_name}", flush=True) | |
| print("This may take 5-10 minutes on first run...", flush=True) | |
| # Use AutoModelForTokenClassification directly for better performance | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_name, | |
| cache_dir="/app/.cache/huggingface" | |
| ) | |
| model = AutoModelForTokenClassification.from_pretrained( | |
| model_name, | |
| cache_dir="/app/.cache/huggingface" | |
| ) | |
| global classifier | |
| classifier = pipeline( | |
| task="token-classification", | |
| model=model, | |
| tokenizer=tokenizer, | |
| aggregation_strategy="simple", | |
| device=-1 # Force CPU | |
| ) | |
| print("✓ Model loaded successfully!", flush=True) | |
| model_error = None | |
| except Exception as e: | |
| model_error = str(e) | |
| print(f"✗ ERROR loading model: {e}", flush=True) | |
| import traceback | |
| traceback.print_exc() | |
| finally: | |
| model_loading = False | |
| # Start model loading in background | |
| model_thread = threading.Thread(target=load_model_async, daemon=True) | |
| model_thread.start() | |
| # HTML Template with proper loading states | |
| HTML_TEMPLATE = ''' | |
| <!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>OpenAI Privacy Filter - PII Detection Demo</title> | |
| <style> | |
| * { box-sizing: border-box; margin: 0; padding: 0; } | |
| body { | |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif; | |
| background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); | |
| min-height: 100vh; | |
| color: #fff; | |
| padding: 20px; | |
| } | |
| .container { max-width: 900px; margin: 0 auto; } | |
| h1 { | |
| text-align: center; margin-bottom: 10px; | |
| background: linear-gradient(90deg, #00d4ff, #7b2cbf); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| font-size: 2.5rem; | |
| } | |
| .subtitle { text-align: center; color: #8892b0; margin-bottom: 30px; } | |
| .card { | |
| background: rgba(255,255,255,0.05); | |
| border-radius: 12px; | |
| padding: 25px; | |
| margin-bottom: 20px; | |
| backdrop-filter: blur(10px); | |
| border: 1px solid rgba(255,255,255,0.1); | |
| } | |
| textarea { | |
| width: 100%; min-height: 150px; padding: 15px; | |
| border-radius: 8px; border: 1px solid rgba(255,255,255,0.2); | |
| background: rgba(0,0,0,0.3); color: #fff; | |
| font-size: 14px; resize: vertical; font-family: monospace; | |
| } | |
| textarea::placeholder { color: #666; } | |
| button { | |
| width: 100%; padding: 15px; margin-top: 15px; | |
| border: none; border-radius: 8px; | |
| background: linear-gradient(90deg, #00d4ff, #7b2cbf); | |
| color: #fff; font-size: 16px; font-weight: 600; | |
| cursor: pointer; transition: transform 0.2s, box-shadow 0.2s; | |
| } | |
| button:hover:not(:disabled) { | |
| transform: translateY(-2px); | |
| box-shadow: 0 5px 25px rgba(0,212,255,0.4); | |
| } | |
| button:disabled { | |
| opacity: 0.6; cursor: not-allowed; | |
| background: linear-gradient(90deg, #666, #444); | |
| } | |
| .results { display: none; } | |
| .results.active { display: block; } | |
| .result-text { | |
| background: rgba(0,0,0,0.3); padding: 20px; | |
| border-radius: 8px; font-family: monospace; | |
| line-height: 1.8; word-wrap: break-word; | |
| white-space: pre-wrap; | |
| } | |
| .entity { | |
| padding: 2px 8px; border-radius: 4px; | |
| font-weight: bold; | |
| } | |
| .entity-private_person { background: rgba(255,107,107,0.3); border: 1px solid #ff6b6b; } | |
| .entity-private_email { background: rgba(78,205,196,0.3); border: 1px solid #4ecdc4; } | |
| .entity-private_phone { background: rgba(255,209,102,0.3); border: 1px solid #ffd166; } | |
| .entity-private_address { background: rgba(6,214,160,0.3); border: 1px solid #06d6a0; } | |
| .entity-account_number { background: rgba(239,71,111,0.3); border: 1px solid #ef476f; } | |
| .entity-secret { background: rgba(255,0,110,0.3); border: 1px solid #ff006e; } | |
| .entity-private_url { background: rgba(131,56,236,0.3); border: 1px solid #8338ec; } | |
| .entity-private_date { background: rgba(58,134,255,0.3); border: 1px solid #3a86ff; } | |
| .legend { | |
| display: flex; flex-wrap: wrap; gap: 10px; | |
| margin-top: 15px; justify-content: center; | |
| } | |
| .legend-item { | |
| display: flex; align-items: center; | |
| gap: 5px; font-size: 12px; | |
| } | |
| .legend-color { | |
| width: 20px; height: 20px; | |
| border-radius: 4px; border: 1px solid; | |
| } | |
| .details-list { margin-top: 20px; } | |
| .detail-item { | |
| display: flex; justify-content: space-between; | |
| align-items: center; padding: 12px; | |
| background: rgba(255,255,255,0.03); | |
| border-radius: 6px; margin-bottom: 8px; | |
| } | |
| .detail-type { font-weight: bold; color: #00d4ff; } | |
| .detail-score { font-size: 12px; color: #8892b0; } | |
| .error-box { | |
| background: rgba(239,71,111,0.2); | |
| border: 1px solid #ef476f; | |
| padding: 15px; | |
| border-radius: 8px; | |
| margin-top: 15px; | |
| color: #ff6b6b; | |
| } | |
| .info-box { | |
| background: rgba(0,212,255,0.1); | |
| border-left: 3px solid #00d4ff; | |
| padding: 15px; margin-bottom: 20px; | |
| border-radius: 0 8px 8px 0; | |
| } | |
| .info-box h3 { margin-bottom: 5px; } | |
| .info-box ul { margin-left: 20px; color: #8892b0; } | |
| .status-indicator { | |
| display: inline-block; | |
| width: 10px; height: 10px; | |
| border-radius: 50%; | |
| margin-right: 8px; | |
| } | |
| .status-ok { background: #06d6a0; } | |
| .status-error { background: #ef476f; } | |
| .status-loading { background: #ffd166; animation: pulse 1s infinite; } | |
| .status-waiting { background: #3a86ff; } | |
| @keyframes pulse { | |
| 0%, 100% { opacity: 1; } | |
| 50% { opacity: 0.3; } | |
| } | |
| #modelStatus { | |
| text-align: center; | |
| margin-bottom: 15px; | |
| padding: 15px; | |
| background: rgba(0,0,0,0.3); | |
| border-radius: 8px; | |
| font-size: 14px; | |
| } | |
| .loading-spinner { | |
| display: inline-block; | |
| width: 20px; height: 20px; | |
| border: 3px solid rgba(255,255,255,0.3); | |
| border-top-color: #00d4ff; | |
| border-radius: 50%; | |
| animation: spin 1s linear infinite; | |
| margin-right: 10px; | |
| vertical-align: middle; | |
| } | |
| @keyframes spin { | |
| to { transform: rotate(360deg); } | |
| } | |
| .progress-bar { | |
| width: 100%; | |
| height: 4px; | |
| background: rgba(255,255,255,0.1); | |
| border-radius: 2px; | |
| margin-top: 10px; | |
| overflow: hidden; | |
| } | |
| .progress-fill { | |
| height: 100%; | |
| background: linear-gradient(90deg, #00d4ff, #7b2cbf); | |
| animation: progress 2s ease-in-out infinite; | |
| } | |
| @keyframes progress { | |
| 0% { width: 0%; transform: translateX(-100%); } | |
| 50% { width: 70%; transform: translateX(50%); } | |
| 100% { width: 0%; transform: translateX(200%); } | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <h1>OpenAI Privacy Filter</h1> | |
| <p class="subtitle">PII Detection & Masking Demo using Flask</p> | |
| <div id="modelStatus"> | |
| <span id="statusIndicator" class="status-indicator status-loading"></span> | |
| <span id="statusText">Waiting for server to start...</span> | |
| <div class="progress-bar" id="progressBar"> | |
| <div class="progress-fill"></div> | |
| </div> | |
| </div> | |
| <div class="info-box"> | |
| <h3>Detects 8 Types of PII:</h3> | |
| <ul> | |
| <li><strong>private_person</strong> - Names and personal identifiers</li> | |
| <li><strong>private_email</strong> - Email addresses</li> | |
| <li><strong>private_phone</strong> - Phone numbers</li> | |
| <li><strong>private_address</strong> - Physical addresses</li> | |
| <li><strong>account_number</strong> - Account/ID numbers</li> | |
| <li><strong>secret</strong> - Passwords, tokens, credentials</li> | |
| <li><strong>private_url</strong> - Personal/private URLs</li> | |
| <li><strong>private_date</strong> - Personal dates (birthdays, etc.)</li> | |
| </ul> | |
| </div> | |
| <div class="card"> | |
| <textarea id="inputText" placeholder="Enter text with PII here...\n\nExample: My name is Alice Smith and my email is alice.smith@example.com. You can reach me at (555) 123-4567 or visit me at 123 Main Street, New York. My SSN is 123-45-6789."></textarea> | |
| <button onclick="analyzeText()" id="analyzeBtn" disabled>Waiting for model...</button> | |
| <div id="errorBox" class="error-box" style="display: none;"></div> | |
| </div> | |
| <div class="card results" id="resultsCard"> | |
| <h3 style="margin-bottom: 15px;">Results</h3> | |
| <div class="result-text" id="resultDisplay"></div> | |
| <div class="legend"> | |
| <div class="legend-item"><div class="legend-color entity-private_person"></div> Person</div> | |
| <div class="legend-item"><div class="legend-color entity-private_email"></div> Email</div> | |
| <div class="legend-item"><div class="legend-color entity-private_phone"></div> Phone</div> | |
| <div class="legend-item"><div class="legend-color entity-private_address"></div> Address</div> | |
| <div class="legend-item"><div class="legend-color entity-account_number"></div> Account</div> | |
| <div class="legend-item"><div class="legend-color entity-secret"></div> Secret</div> | |
| <div class="legend-item"><div class="legend-color entity-private_url"></div> URL</div> | |
| <div class="legend-item"><div class="legend-color entity-private_date"></div> Date</div> | |
| </div> | |
| <div class="details-list" id="detailsList"></div> | |
| </div> | |
| </div> | |
| <script> | |
| let statusCheckInterval = null; | |
| let isModelLoaded = false; | |
| let retryCount = 0; | |
| const maxRetries = 200; // 16 minutes of retrying (200 * 5 seconds) | |
| function updateStatus(state, message) { | |
| const statusIndicator = document.getElementById("statusIndicator"); | |
| const statusText = document.getElementById("statusText"); | |
| const progressBar = document.getElementById("progressBar"); | |
| const btn = document.getElementById("analyzeBtn"); | |
| switch(state) { | |
| case 'connecting': | |
| statusIndicator.className = "status-indicator status-waiting"; | |
| statusText.innerHTML = `<span class="loading-spinner"></span>${message}`; | |
| btn.disabled = true; | |
| btn.textContent = "Server is starting up..."; | |
| progressBar.style.display = "block"; | |
| break; | |
| case 'loading': | |
| statusIndicator.className = "status-indicator status-loading"; | |
| statusText.innerHTML = `<span class="loading-spinner"></span>${message}`; | |
| btn.disabled = true; | |
| btn.textContent = "Model is loading..."; | |
| progressBar.style.display = "block"; | |
| break; | |
| case 'ready': | |
| statusIndicator.className = "status-indicator status-ok"; | |
| statusText.innerHTML = "✓ " + message; | |
| btn.disabled = false; | |
| btn.textContent = "Detect PII"; | |
| progressBar.style.display = "none"; | |
| break; | |
| case 'error': | |
| statusIndicator.className = "status-indicator status-error"; | |
| statusText.innerHTML = "✗ " + message; | |
| btn.disabled = true; | |
| btn.textContent = "Model unavailable"; | |
| progressBar.style.display = "none"; | |
| break; | |
| } | |
| } | |
| // Check model status on page load and keep polling | |
| async function checkModelStatus() { | |
| retryCount++; | |
| if (retryCount > maxRetries) { | |
| updateStatus('error', 'Server did not respond after 16 minutes. Refresh to retry.'); | |
| clearInterval(statusCheckInterval); | |
| statusCheckInterval = null; | |
| // Show reload button | |
| updateStatus('error', 'Server did not respond. <button onclick="location.reload()">Refresh Page</button>'); | |
| return; | |
| } | |
| try { | |
| const response = await fetch("/health", { | |
| method: "GET", | |
| headers: { "Cache-Control": "no-cache" } | |
| }); | |
| if (!response.ok) { | |
| throw new Error(`HTTP ${response.status}`); | |
| } | |
| const data = await response.json(); | |
| console.log("Health check response:", data); | |
| if (data.model_loading) { | |
| // Still loading | |
| updateStatus('loading', `Model loading initialized... (5-10 minutes on first run)`); | |
| if (!statusCheckInterval) { | |
| statusCheckInterval = setInterval(checkModelStatus, 5000); | |
| } | |
| isModelLoaded = false; | |
| } else if (data.model_loaded) { | |
| // Model ready | |
| updateStatus('ready', 'Model loaded and ready'); | |
| if (statusCheckInterval) { | |
| clearInterval(statusCheckInterval); | |
| statusCheckInterval = null; | |
| } | |
| isModelLoaded = true; | |
| retryCount = 0; | |
| } else { | |
| // Model failed | |
| updateStatus('error', `Model failed: ${data.error || "Unknown error"}`); | |
| const errorBox = document.getElementById("errorBox"); | |
| errorBox.style.display = "block"; | |
| errorBox.innerHTML = `<strong>Error:</strong> ${data.error || "Unknown error"}`; | |
| if (statusCheckInterval) { | |
| clearInterval(statusCheckInterval); | |
| statusCheckInterval = null; | |
| } | |
| isModelLoaded = false; | |
| } | |
| } catch (error) { | |
| console.error("Health check failed:", error); | |
| // Server not ready yet, show connecting state | |
| updateStatus('connecting', `Waiting for server to start... (attempt ${retryCount})`); | |
| if (!statusCheckInterval) { | |
| statusCheckInterval = setInterval(checkModelStatus, 5000); | |
| } | |
| } | |
| } | |
| // Start checking immediately with connecting state | |
| checkModelStatus(); | |
| async function analyzeText() { | |
| const text = document.getElementById("inputText").value; | |
| const btn = document.getElementById("analyzeBtn"); | |
| const resultsCard = document.getElementById("resultsCard"); | |
| const errorBox = document.getElementById("errorBox"); | |
| if (!text.trim()) { | |
| errorBox.style.display = "block"; | |
| errorBox.textContent = "Please enter some text first!"; | |
| return; | |
| } | |
| btn.disabled = true; | |
| btn.innerHTML = '<span class="loading-spinner"></span>Analyzing...'; | |
| errorBox.style.display = "none"; | |
| try { | |
| const response = await fetch("/analyze", { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json" }, | |
| body: JSON.stringify({ text: text }) | |
| }); | |
| const data = await response.json(); | |
| if (!response.ok || !data.success) { | |
| throw new Error(data.error || "Server error"); | |
| } | |
| displayResults(data, text); | |
| resultsCard.classList.add("active"); | |
| } catch (error) { | |
| console.error("Error during analysis:", error); | |
| errorBox.style.display = "block"; | |
| errorBox.textContent = "Error: " + error.message; | |
| resultsCard.classList.remove("active"); | |
| } finally { | |
| if (isModelLoaded) { | |
| btn.disabled = false; | |
| btn.textContent = "Detect PII"; | |
| } | |
| } | |
| } | |
| function displayResults(data, originalText) { | |
| let html = ""; | |
| let lastEnd = 0; | |
| if (data.entities && data.entities.length > 0) { | |
| const sorted = data.entities.sort((a, b) => a.start - b.start); | |
| for (const entity of sorted) { | |
| html += escapeHtml(originalText.slice(lastEnd, entity.start)); | |
| html += `<span class="entity entity-${entity.label}">${escapeHtml(entity.text)}</span>`; | |
| lastEnd = entity.end; | |
| } | |
| html += escapeHtml(originalText.slice(lastEnd)); | |
| const detailsHtml = sorted.map(e => ` | |
| <div class="detail-item"> | |
| <div> | |
| <span class="detail-type">${e.label}</span>: ${escapeHtml(e.text)} | |
| </div> | |
| <div class="detail-score">Score: ${(e.score * 100).toFixed(2)}%</div> | |
| </div> | |
| `).join(""); | |
| document.getElementById("detailsList").innerHTML = "<h4 style='margin:20px 0 10px 0;'>Detected Entities:</h4>" + detailsHtml; | |
| } else { | |
| html = escapeHtml(originalText) + "\\n\\n[No PII detected]"; | |
| document.getElementById("detailsList").innerHTML = ""; | |
| } | |
| document.getElementById("resultDisplay").innerHTML = html; | |
| } | |
| function escapeHtml(text) { | |
| const div = document.createElement("div"); | |
| div.textContent = text; | |
| return div.innerHTML; | |
| } | |
| // Cleanup on page unload | |
| window.addEventListener("beforeunload", () => { | |
| if (statusCheckInterval) { | |
| clearInterval(statusCheckInterval); | |
| } | |
| }); | |
| // Add keyboard shortcut (Ctrl+Enter to analyze) | |
| document.addEventListener('DOMContentLoaded', () => { | |
| document.getElementById('inputText').addEventListener('keydown', function(e) { | |
| if (e.ctrlKey && e.key === 'Enter') { | |
| analyzeText(); | |
| } | |
| }); | |
| }); | |
| </script> | |
| </body> | |
| </html> | |
| ''' | |
| def index(): | |
| return render_template_string(HTML_TEMPLATE) | |
| def health(): | |
| """Health check with model loading status""" | |
| global classifier, model_loading, model_error, model_thread | |
| if classifier is not None: | |
| return jsonify({ | |
| 'status': 'healthy', | |
| 'model_loaded': True, | |
| 'model_loading': False | |
| }) | |
| elif model_loading: | |
| return jsonify({ | |
| 'status': 'loading', | |
| 'model_loaded': False, | |
| 'model_loading': True, | |
| 'message': 'Model is still loading, please wait...' | |
| }) | |
| else: | |
| # Model failed or thread died | |
| return jsonify({ | |
| 'status': 'unhealthy', | |
| 'model_loaded': False, | |
| 'model_loading': False, | |
| 'error': model_error or 'Model loading failed or thread terminated unexpectedly' | |
| }), 503 | |
| def analyze(): | |
| if request.method == 'OPTIONS': | |
| return '', 204 | |
| global classifier, model_loading | |
| if classifier is None: | |
| return jsonify({ | |
| 'success': False, | |
| 'error': f'Model not yet loaded. Current status: {"loading" if model_loading else "failed"}. Please wait and refresh in a few minutes.' | |
| }), 503 | |
| try: | |
| data = request.get_json() | |
| if not data: | |
| return jsonify({'success': False, 'error': 'No JSON data received'}), 400 | |
| text = data.get('text', '') | |
| if not text.strip(): | |
| return jsonify({'success': True, 'entities': [], 'entity_count': 0}) | |
| # Run classification | |
| results = classifier(text) | |
| entities = [] | |
| for entity in results: | |
| entities.append({ | |
| 'label': entity.get('entity_group', entity.get('entity', 'unknown')), | |
| 'text': entity.get('word', ''), | |
| 'start': entity.get('start', 0), | |
| 'end': entity.get('end', 0), | |
| 'score': float(entity.get('score', 0)) | |
| }) | |
| return jsonify({ | |
| 'success': True, | |
| 'entities': entities, | |
| 'entity_count': len(entities) | |
| }) | |
| except Exception as e: | |
| print(f"Error during analysis: {e}", flush=True) | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({ | |
| 'success': False, | |
| 'error': str(e) | |
| }), 500 | |
| if __name__ == '__main__': | |
| port = int(os.environ.get('PORT', 7860)) | |
| app.run(host='0.0.0.0', port=port, debug=False, threaded=True) |