broadfield-dev's picture
Upload 3 files
caa2485 verified
from flask import Flask, render_template_string, request, jsonify
from flask_cors import CORS
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import os
import sys
import threading
import time
app = Flask(__name__)
CORS(app)
# Model loading state (thread-safe)
model_name = "openai/privacy-filter"
classifier = None
model_loading = False
model_error = None
model_thread = None
# Background model loading
def load_model_async():
global classifier, model_loading, model_error
model_loading = True
print("="*60, flush=True)
print("BACKGROUND: Loading OpenAI Privacy Filter model...", flush=True)
print("="*60, flush=True)
try:
print(f"Loading tokenizer and model: {model_name}", flush=True)
print("This may take 5-10 minutes on first run...", flush=True)
# Use AutoModelForTokenClassification directly for better performance
tokenizer = AutoTokenizer.from_pretrained(
model_name,
cache_dir="/app/.cache/huggingface"
)
model = AutoModelForTokenClassification.from_pretrained(
model_name,
cache_dir="/app/.cache/huggingface"
)
global classifier
classifier = pipeline(
task="token-classification",
model=model,
tokenizer=tokenizer,
aggregation_strategy="simple",
device=-1 # Force CPU
)
print("✓ Model loaded successfully!", flush=True)
model_error = None
except Exception as e:
model_error = str(e)
print(f"✗ ERROR loading model: {e}", flush=True)
import traceback
traceback.print_exc()
finally:
model_loading = False
# Start model loading in background
model_thread = threading.Thread(target=load_model_async, daemon=True)
model_thread.start()
# HTML Template with proper loading states
HTML_TEMPLATE = '''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>OpenAI Privacy Filter - PII Detection Demo</title>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
min-height: 100vh;
color: #fff;
padding: 20px;
}
.container { max-width: 900px; margin: 0 auto; }
h1 {
text-align: center; margin-bottom: 10px;
background: linear-gradient(90deg, #00d4ff, #7b2cbf);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-size: 2.5rem;
}
.subtitle { text-align: center; color: #8892b0; margin-bottom: 30px; }
.card {
background: rgba(255,255,255,0.05);
border-radius: 12px;
padding: 25px;
margin-bottom: 20px;
backdrop-filter: blur(10px);
border: 1px solid rgba(255,255,255,0.1);
}
textarea {
width: 100%; min-height: 150px; padding: 15px;
border-radius: 8px; border: 1px solid rgba(255,255,255,0.2);
background: rgba(0,0,0,0.3); color: #fff;
font-size: 14px; resize: vertical; font-family: monospace;
}
textarea::placeholder { color: #666; }
button {
width: 100%; padding: 15px; margin-top: 15px;
border: none; border-radius: 8px;
background: linear-gradient(90deg, #00d4ff, #7b2cbf);
color: #fff; font-size: 16px; font-weight: 600;
cursor: pointer; transition: transform 0.2s, box-shadow 0.2s;
}
button:hover:not(:disabled) {
transform: translateY(-2px);
box-shadow: 0 5px 25px rgba(0,212,255,0.4);
}
button:disabled {
opacity: 0.6; cursor: not-allowed;
background: linear-gradient(90deg, #666, #444);
}
.results { display: none; }
.results.active { display: block; }
.result-text {
background: rgba(0,0,0,0.3); padding: 20px;
border-radius: 8px; font-family: monospace;
line-height: 1.8; word-wrap: break-word;
white-space: pre-wrap;
}
.entity {
padding: 2px 8px; border-radius: 4px;
font-weight: bold;
}
.entity-private_person { background: rgba(255,107,107,0.3); border: 1px solid #ff6b6b; }
.entity-private_email { background: rgba(78,205,196,0.3); border: 1px solid #4ecdc4; }
.entity-private_phone { background: rgba(255,209,102,0.3); border: 1px solid #ffd166; }
.entity-private_address { background: rgba(6,214,160,0.3); border: 1px solid #06d6a0; }
.entity-account_number { background: rgba(239,71,111,0.3); border: 1px solid #ef476f; }
.entity-secret { background: rgba(255,0,110,0.3); border: 1px solid #ff006e; }
.entity-private_url { background: rgba(131,56,236,0.3); border: 1px solid #8338ec; }
.entity-private_date { background: rgba(58,134,255,0.3); border: 1px solid #3a86ff; }
.legend {
display: flex; flex-wrap: wrap; gap: 10px;
margin-top: 15px; justify-content: center;
}
.legend-item {
display: flex; align-items: center;
gap: 5px; font-size: 12px;
}
.legend-color {
width: 20px; height: 20px;
border-radius: 4px; border: 1px solid;
}
.details-list { margin-top: 20px; }
.detail-item {
display: flex; justify-content: space-between;
align-items: center; padding: 12px;
background: rgba(255,255,255,0.03);
border-radius: 6px; margin-bottom: 8px;
}
.detail-type { font-weight: bold; color: #00d4ff; }
.detail-score { font-size: 12px; color: #8892b0; }
.error-box {
background: rgba(239,71,111,0.2);
border: 1px solid #ef476f;
padding: 15px;
border-radius: 8px;
margin-top: 15px;
color: #ff6b6b;
}
.info-box {
background: rgba(0,212,255,0.1);
border-left: 3px solid #00d4ff;
padding: 15px; margin-bottom: 20px;
border-radius: 0 8px 8px 0;
}
.info-box h3 { margin-bottom: 5px; }
.info-box ul { margin-left: 20px; color: #8892b0; }
.status-indicator {
display: inline-block;
width: 10px; height: 10px;
border-radius: 50%;
margin-right: 8px;
}
.status-ok { background: #06d6a0; }
.status-error { background: #ef476f; }
.status-loading { background: #ffd166; animation: pulse 1s infinite; }
.status-waiting { background: #3a86ff; }
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.3; }
}
#modelStatus {
text-align: center;
margin-bottom: 15px;
padding: 15px;
background: rgba(0,0,0,0.3);
border-radius: 8px;
font-size: 14px;
}
.loading-spinner {
display: inline-block;
width: 20px; height: 20px;
border: 3px solid rgba(255,255,255,0.3);
border-top-color: #00d4ff;
border-radius: 50%;
animation: spin 1s linear infinite;
margin-right: 10px;
vertical-align: middle;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.progress-bar {
width: 100%;
height: 4px;
background: rgba(255,255,255,0.1);
border-radius: 2px;
margin-top: 10px;
overflow: hidden;
}
.progress-fill {
height: 100%;
background: linear-gradient(90deg, #00d4ff, #7b2cbf);
animation: progress 2s ease-in-out infinite;
}
@keyframes progress {
0% { width: 0%; transform: translateX(-100%); }
50% { width: 70%; transform: translateX(50%); }
100% { width: 0%; transform: translateX(200%); }
}
</style>
</head>
<body>
<div class="container">
<h1>OpenAI Privacy Filter</h1>
<p class="subtitle">PII Detection & Masking Demo using Flask</p>
<div id="modelStatus">
<span id="statusIndicator" class="status-indicator status-loading"></span>
<span id="statusText">Waiting for server to start...</span>
<div class="progress-bar" id="progressBar">
<div class="progress-fill"></div>
</div>
</div>
<div class="info-box">
<h3>Detects 8 Types of PII:</h3>
<ul>
<li><strong>private_person</strong> - Names and personal identifiers</li>
<li><strong>private_email</strong> - Email addresses</li>
<li><strong>private_phone</strong> - Phone numbers</li>
<li><strong>private_address</strong> - Physical addresses</li>
<li><strong>account_number</strong> - Account/ID numbers</li>
<li><strong>secret</strong> - Passwords, tokens, credentials</li>
<li><strong>private_url</strong> - Personal/private URLs</li>
<li><strong>private_date</strong> - Personal dates (birthdays, etc.)</li>
</ul>
</div>
<div class="card">
<textarea id="inputText" placeholder="Enter text with PII here...\n\nExample: My name is Alice Smith and my email is alice.smith@example.com. You can reach me at (555) 123-4567 or visit me at 123 Main Street, New York. My SSN is 123-45-6789."></textarea>
<button onclick="analyzeText()" id="analyzeBtn" disabled>Waiting for model...</button>
<div id="errorBox" class="error-box" style="display: none;"></div>
</div>
<div class="card results" id="resultsCard">
<h3 style="margin-bottom: 15px;">Results</h3>
<div class="result-text" id="resultDisplay"></div>
<div class="legend">
<div class="legend-item"><div class="legend-color entity-private_person"></div> Person</div>
<div class="legend-item"><div class="legend-color entity-private_email"></div> Email</div>
<div class="legend-item"><div class="legend-color entity-private_phone"></div> Phone</div>
<div class="legend-item"><div class="legend-color entity-private_address"></div> Address</div>
<div class="legend-item"><div class="legend-color entity-account_number"></div> Account</div>
<div class="legend-item"><div class="legend-color entity-secret"></div> Secret</div>
<div class="legend-item"><div class="legend-color entity-private_url"></div> URL</div>
<div class="legend-item"><div class="legend-color entity-private_date"></div> Date</div>
</div>
<div class="details-list" id="detailsList"></div>
</div>
</div>
<script>
let statusCheckInterval = null;
let isModelLoaded = false;
let retryCount = 0;
const maxRetries = 200; // 16 minutes of retrying (200 * 5 seconds)
function updateStatus(state, message) {
const statusIndicator = document.getElementById("statusIndicator");
const statusText = document.getElementById("statusText");
const progressBar = document.getElementById("progressBar");
const btn = document.getElementById("analyzeBtn");
switch(state) {
case 'connecting':
statusIndicator.className = "status-indicator status-waiting";
statusText.innerHTML = `<span class="loading-spinner"></span>${message}`;
btn.disabled = true;
btn.textContent = "Server is starting up...";
progressBar.style.display = "block";
break;
case 'loading':
statusIndicator.className = "status-indicator status-loading";
statusText.innerHTML = `<span class="loading-spinner"></span>${message}`;
btn.disabled = true;
btn.textContent = "Model is loading...";
progressBar.style.display = "block";
break;
case 'ready':
statusIndicator.className = "status-indicator status-ok";
statusText.innerHTML = "✓ " + message;
btn.disabled = false;
btn.textContent = "Detect PII";
progressBar.style.display = "none";
break;
case 'error':
statusIndicator.className = "status-indicator status-error";
statusText.innerHTML = "✗ " + message;
btn.disabled = true;
btn.textContent = "Model unavailable";
progressBar.style.display = "none";
break;
}
}
// Check model status on page load and keep polling
async function checkModelStatus() {
retryCount++;
if (retryCount > maxRetries) {
updateStatus('error', 'Server did not respond after 16 minutes. Refresh to retry.');
clearInterval(statusCheckInterval);
statusCheckInterval = null;
// Show reload button
updateStatus('error', 'Server did not respond. <button onclick="location.reload()">Refresh Page</button>');
return;
}
try {
const response = await fetch("/health", {
method: "GET",
headers: { "Cache-Control": "no-cache" }
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const data = await response.json();
console.log("Health check response:", data);
if (data.model_loading) {
// Still loading
updateStatus('loading', `Model loading initialized... (5-10 minutes on first run)`);
if (!statusCheckInterval) {
statusCheckInterval = setInterval(checkModelStatus, 5000);
}
isModelLoaded = false;
} else if (data.model_loaded) {
// Model ready
updateStatus('ready', 'Model loaded and ready');
if (statusCheckInterval) {
clearInterval(statusCheckInterval);
statusCheckInterval = null;
}
isModelLoaded = true;
retryCount = 0;
} else {
// Model failed
updateStatus('error', `Model failed: ${data.error || "Unknown error"}`);
const errorBox = document.getElementById("errorBox");
errorBox.style.display = "block";
errorBox.innerHTML = `<strong>Error:</strong> ${data.error || "Unknown error"}`;
if (statusCheckInterval) {
clearInterval(statusCheckInterval);
statusCheckInterval = null;
}
isModelLoaded = false;
}
} catch (error) {
console.error("Health check failed:", error);
// Server not ready yet, show connecting state
updateStatus('connecting', `Waiting for server to start... (attempt ${retryCount})`);
if (!statusCheckInterval) {
statusCheckInterval = setInterval(checkModelStatus, 5000);
}
}
}
// Start checking immediately with connecting state
checkModelStatus();
async function analyzeText() {
const text = document.getElementById("inputText").value;
const btn = document.getElementById("analyzeBtn");
const resultsCard = document.getElementById("resultsCard");
const errorBox = document.getElementById("errorBox");
if (!text.trim()) {
errorBox.style.display = "block";
errorBox.textContent = "Please enter some text first!";
return;
}
btn.disabled = true;
btn.innerHTML = '<span class="loading-spinner"></span>Analyzing...';
errorBox.style.display = "none";
try {
const response = await fetch("/analyze", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ text: text })
});
const data = await response.json();
if (!response.ok || !data.success) {
throw new Error(data.error || "Server error");
}
displayResults(data, text);
resultsCard.classList.add("active");
} catch (error) {
console.error("Error during analysis:", error);
errorBox.style.display = "block";
errorBox.textContent = "Error: " + error.message;
resultsCard.classList.remove("active");
} finally {
if (isModelLoaded) {
btn.disabled = false;
btn.textContent = "Detect PII";
}
}
}
function displayResults(data, originalText) {
let html = "";
let lastEnd = 0;
if (data.entities && data.entities.length > 0) {
const sorted = data.entities.sort((a, b) => a.start - b.start);
for (const entity of sorted) {
html += escapeHtml(originalText.slice(lastEnd, entity.start));
html += `<span class="entity entity-${entity.label}">${escapeHtml(entity.text)}</span>`;
lastEnd = entity.end;
}
html += escapeHtml(originalText.slice(lastEnd));
const detailsHtml = sorted.map(e => `
<div class="detail-item">
<div>
<span class="detail-type">${e.label}</span>: ${escapeHtml(e.text)}
</div>
<div class="detail-score">Score: ${(e.score * 100).toFixed(2)}%</div>
</div>
`).join("");
document.getElementById("detailsList").innerHTML = "<h4 style='margin:20px 0 10px 0;'>Detected Entities:</h4>" + detailsHtml;
} else {
html = escapeHtml(originalText) + "\\n\\n[No PII detected]";
document.getElementById("detailsList").innerHTML = "";
}
document.getElementById("resultDisplay").innerHTML = html;
}
function escapeHtml(text) {
const div = document.createElement("div");
div.textContent = text;
return div.innerHTML;
}
// Cleanup on page unload
window.addEventListener("beforeunload", () => {
if (statusCheckInterval) {
clearInterval(statusCheckInterval);
}
});
// Add keyboard shortcut (Ctrl+Enter to analyze)
document.addEventListener('DOMContentLoaded', () => {
document.getElementById('inputText').addEventListener('keydown', function(e) {
if (e.ctrlKey && e.key === 'Enter') {
analyzeText();
}
});
});
</script>
</body>
</html>
'''
@app.route('/')
def index():
return render_template_string(HTML_TEMPLATE)
@app.route('/health')
def health():
"""Health check with model loading status"""
global classifier, model_loading, model_error, model_thread
if classifier is not None:
return jsonify({
'status': 'healthy',
'model_loaded': True,
'model_loading': False
})
elif model_loading:
return jsonify({
'status': 'loading',
'model_loaded': False,
'model_loading': True,
'message': 'Model is still loading, please wait...'
})
else:
# Model failed or thread died
return jsonify({
'status': 'unhealthy',
'model_loaded': False,
'model_loading': False,
'error': model_error or 'Model loading failed or thread terminated unexpectedly'
}), 503
@app.route('/analyze', methods=['POST', 'OPTIONS'])
def analyze():
if request.method == 'OPTIONS':
return '', 204
global classifier, model_loading
if classifier is None:
return jsonify({
'success': False,
'error': f'Model not yet loaded. Current status: {"loading" if model_loading else "failed"}. Please wait and refresh in a few minutes.'
}), 503
try:
data = request.get_json()
if not data:
return jsonify({'success': False, 'error': 'No JSON data received'}), 400
text = data.get('text', '')
if not text.strip():
return jsonify({'success': True, 'entities': [], 'entity_count': 0})
# Run classification
results = classifier(text)
entities = []
for entity in results:
entities.append({
'label': entity.get('entity_group', entity.get('entity', 'unknown')),
'text': entity.get('word', ''),
'start': entity.get('start', 0),
'end': entity.get('end', 0),
'score': float(entity.get('score', 0))
})
return jsonify({
'success': True,
'entities': entities,
'entity_count': len(entities)
})
except Exception as e:
print(f"Error during analysis: {e}", flush=True)
import traceback
traceback.print_exc()
return jsonify({
'success': False,
'error': str(e)
}), 500
if __name__ == '__main__':
port = int(os.environ.get('PORT', 7860))
app.run(host='0.0.0.0', port=port, debug=False, threaded=True)