broadfield-dev commited on
Commit
caa2485
·
verified ·
1 Parent(s): 9ad819a

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +19 -0
  2. app.py +588 -0
  3. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ git \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Install Python dependencies
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Copy application code
15
+ COPY app.py .
16
+
17
+ EXPOSE 7860
18
+
19
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,588 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template_string, request, jsonify
2
+ from flask_cors import CORS
3
+ from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
4
+ import os
5
+ import sys
6
+ import threading
7
+ import time
8
+
9
+ app = Flask(__name__)
10
+ CORS(app)
11
+
12
+ # Model loading state (thread-safe)
13
+ model_name = "openai/privacy-filter"
14
+ classifier = None
15
+ model_loading = False
16
+ model_error = None
17
+ model_thread = None
18
+
19
+ # Background model loading
20
+ def load_model_async():
21
+ global classifier, model_loading, model_error
22
+ model_loading = True
23
+
24
+ print("="*60, flush=True)
25
+ print("BACKGROUND: Loading OpenAI Privacy Filter model...", flush=True)
26
+ print("="*60, flush=True)
27
+
28
+ try:
29
+ print(f"Loading tokenizer and model: {model_name}", flush=True)
30
+ print("This may take 5-10 minutes on first run...", flush=True)
31
+
32
+ # Use AutoModelForTokenClassification directly for better performance
33
+ tokenizer = AutoTokenizer.from_pretrained(
34
+ model_name,
35
+ cache_dir="/app/.cache/huggingface"
36
+ )
37
+ model = AutoModelForTokenClassification.from_pretrained(
38
+ model_name,
39
+ cache_dir="/app/.cache/huggingface"
40
+ )
41
+
42
+ global classifier
43
+ classifier = pipeline(
44
+ task="token-classification",
45
+ model=model,
46
+ tokenizer=tokenizer,
47
+ aggregation_strategy="simple",
48
+ device=-1 # Force CPU
49
+ )
50
+
51
+ print("✓ Model loaded successfully!", flush=True)
52
+ model_error = None
53
+ except Exception as e:
54
+ model_error = str(e)
55
+ print(f"✗ ERROR loading model: {e}", flush=True)
56
+ import traceback
57
+ traceback.print_exc()
58
+ finally:
59
+ model_loading = False
60
+
61
+ # Start model loading in background
62
+ model_thread = threading.Thread(target=load_model_async, daemon=True)
63
+ model_thread.start()
64
+
65
+ # HTML Template with proper loading states
66
+ HTML_TEMPLATE = '''
67
+ <!DOCTYPE html>
68
+ <html lang="en">
69
+ <head>
70
+ <meta charset="UTF-8">
71
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
72
+ <title>OpenAI Privacy Filter - PII Detection Demo</title>
73
+ <style>
74
+ * { box-sizing: border-box; margin: 0; padding: 0; }
75
+ body {
76
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
77
+ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
78
+ min-height: 100vh;
79
+ color: #fff;
80
+ padding: 20px;
81
+ }
82
+ .container { max-width: 900px; margin: 0 auto; }
83
+ h1 {
84
+ text-align: center; margin-bottom: 10px;
85
+ background: linear-gradient(90deg, #00d4ff, #7b2cbf);
86
+ -webkit-background-clip: text;
87
+ -webkit-text-fill-color: transparent;
88
+ font-size: 2.5rem;
89
+ }
90
+ .subtitle { text-align: center; color: #8892b0; margin-bottom: 30px; }
91
+ .card {
92
+ background: rgba(255,255,255,0.05);
93
+ border-radius: 12px;
94
+ padding: 25px;
95
+ margin-bottom: 20px;
96
+ backdrop-filter: blur(10px);
97
+ border: 1px solid rgba(255,255,255,0.1);
98
+ }
99
+ textarea {
100
+ width: 100%; min-height: 150px; padding: 15px;
101
+ border-radius: 8px; border: 1px solid rgba(255,255,255,0.2);
102
+ background: rgba(0,0,0,0.3); color: #fff;
103
+ font-size: 14px; resize: vertical; font-family: monospace;
104
+ }
105
+ textarea::placeholder { color: #666; }
106
+ button {
107
+ width: 100%; padding: 15px; margin-top: 15px;
108
+ border: none; border-radius: 8px;
109
+ background: linear-gradient(90deg, #00d4ff, #7b2cbf);
110
+ color: #fff; font-size: 16px; font-weight: 600;
111
+ cursor: pointer; transition: transform 0.2s, box-shadow 0.2s;
112
+ }
113
+ button:hover:not(:disabled) {
114
+ transform: translateY(-2px);
115
+ box-shadow: 0 5px 25px rgba(0,212,255,0.4);
116
+ }
117
+ button:disabled {
118
+ opacity: 0.6; cursor: not-allowed;
119
+ background: linear-gradient(90deg, #666, #444);
120
+ }
121
+ .results { display: none; }
122
+ .results.active { display: block; }
123
+ .result-text {
124
+ background: rgba(0,0,0,0.3); padding: 20px;
125
+ border-radius: 8px; font-family: monospace;
126
+ line-height: 1.8; word-wrap: break-word;
127
+ white-space: pre-wrap;
128
+ }
129
+ .entity {
130
+ padding: 2px 8px; border-radius: 4px;
131
+ font-weight: bold;
132
+ }
133
+ .entity-private_person { background: rgba(255,107,107,0.3); border: 1px solid #ff6b6b; }
134
+ .entity-private_email { background: rgba(78,205,196,0.3); border: 1px solid #4ecdc4; }
135
+ .entity-private_phone { background: rgba(255,209,102,0.3); border: 1px solid #ffd166; }
136
+ .entity-private_address { background: rgba(6,214,160,0.3); border: 1px solid #06d6a0; }
137
+ .entity-account_number { background: rgba(239,71,111,0.3); border: 1px solid #ef476f; }
138
+ .entity-secret { background: rgba(255,0,110,0.3); border: 1px solid #ff006e; }
139
+ .entity-private_url { background: rgba(131,56,236,0.3); border: 1px solid #8338ec; }
140
+ .entity-private_date { background: rgba(58,134,255,0.3); border: 1px solid #3a86ff; }
141
+ .legend {
142
+ display: flex; flex-wrap: wrap; gap: 10px;
143
+ margin-top: 15px; justify-content: center;
144
+ }
145
+ .legend-item {
146
+ display: flex; align-items: center;
147
+ gap: 5px; font-size: 12px;
148
+ }
149
+ .legend-color {
150
+ width: 20px; height: 20px;
151
+ border-radius: 4px; border: 1px solid;
152
+ }
153
+ .details-list { margin-top: 20px; }
154
+ .detail-item {
155
+ display: flex; justify-content: space-between;
156
+ align-items: center; padding: 12px;
157
+ background: rgba(255,255,255,0.03);
158
+ border-radius: 6px; margin-bottom: 8px;
159
+ }
160
+ .detail-type { font-weight: bold; color: #00d4ff; }
161
+ .detail-score { font-size: 12px; color: #8892b0; }
162
+ .error-box {
163
+ background: rgba(239,71,111,0.2);
164
+ border: 1px solid #ef476f;
165
+ padding: 15px;
166
+ border-radius: 8px;
167
+ margin-top: 15px;
168
+ color: #ff6b6b;
169
+ }
170
+ .info-box {
171
+ background: rgba(0,212,255,0.1);
172
+ border-left: 3px solid #00d4ff;
173
+ padding: 15px; margin-bottom: 20px;
174
+ border-radius: 0 8px 8px 0;
175
+ }
176
+ .info-box h3 { margin-bottom: 5px; }
177
+ .info-box ul { margin-left: 20px; color: #8892b0; }
178
+ .status-indicator {
179
+ display: inline-block;
180
+ width: 10px; height: 10px;
181
+ border-radius: 50%;
182
+ margin-right: 8px;
183
+ }
184
+ .status-ok { background: #06d6a0; }
185
+ .status-error { background: #ef476f; }
186
+ .status-loading { background: #ffd166; animation: pulse 1s infinite; }
187
+ .status-waiting { background: #3a86ff; }
188
+ @keyframes pulse {
189
+ 0%, 100% { opacity: 1; }
190
+ 50% { opacity: 0.3; }
191
+ }
192
+ #modelStatus {
193
+ text-align: center;
194
+ margin-bottom: 15px;
195
+ padding: 15px;
196
+ background: rgba(0,0,0,0.3);
197
+ border-radius: 8px;
198
+ font-size: 14px;
199
+ }
200
+ .loading-spinner {
201
+ display: inline-block;
202
+ width: 20px; height: 20px;
203
+ border: 3px solid rgba(255,255,255,0.3);
204
+ border-top-color: #00d4ff;
205
+ border-radius: 50%;
206
+ animation: spin 1s linear infinite;
207
+ margin-right: 10px;
208
+ vertical-align: middle;
209
+ }
210
+ @keyframes spin {
211
+ to { transform: rotate(360deg); }
212
+ }
213
+ .progress-bar {
214
+ width: 100%;
215
+ height: 4px;
216
+ background: rgba(255,255,255,0.1);
217
+ border-radius: 2px;
218
+ margin-top: 10px;
219
+ overflow: hidden;
220
+ }
221
+ .progress-fill {
222
+ height: 100%;
223
+ background: linear-gradient(90deg, #00d4ff, #7b2cbf);
224
+ animation: progress 2s ease-in-out infinite;
225
+ }
226
+ @keyframes progress {
227
+ 0% { width: 0%; transform: translateX(-100%); }
228
+ 50% { width: 70%; transform: translateX(50%); }
229
+ 100% { width: 0%; transform: translateX(200%); }
230
+ }
231
+ </style>
232
+ </head>
233
+ <body>
234
+ <div class="container">
235
+ <h1>OpenAI Privacy Filter</h1>
236
+ <p class="subtitle">PII Detection & Masking Demo using Flask</p>
237
+
238
+ <div id="modelStatus">
239
+ <span id="statusIndicator" class="status-indicator status-loading"></span>
240
+ <span id="statusText">Waiting for server to start...</span>
241
+ <div class="progress-bar" id="progressBar">
242
+ <div class="progress-fill"></div>
243
+ </div>
244
+ </div>
245
+
246
+ <div class="info-box">
247
+ <h3>Detects 8 Types of PII:</h3>
248
+ <ul>
249
+ <li><strong>private_person</strong> - Names and personal identifiers</li>
250
+ <li><strong>private_email</strong> - Email addresses</li>
251
+ <li><strong>private_phone</strong> - Phone numbers</li>
252
+ <li><strong>private_address</strong> - Physical addresses</li>
253
+ <li><strong>account_number</strong> - Account/ID numbers</li>
254
+ <li><strong>secret</strong> - Passwords, tokens, credentials</li>
255
+ <li><strong>private_url</strong> - Personal/private URLs</li>
256
+ <li><strong>private_date</strong> - Personal dates (birthdays, etc.)</li>
257
+ </ul>
258
+ </div>
259
+
260
+ <div class="card">
261
+ <textarea id="inputText" placeholder="Enter text with PII here...\n\nExample: My name is Alice Smith and my email is alice.smith@example.com. You can reach me at (555) 123-4567 or visit me at 123 Main Street, New York. My SSN is 123-45-6789."></textarea>
262
+ <button onclick="analyzeText()" id="analyzeBtn" disabled>Waiting for model...</button>
263
+ <div id="errorBox" class="error-box" style="display: none;"></div>
264
+ </div>
265
+
266
+ <div class="card results" id="resultsCard">
267
+ <h3 style="margin-bottom: 15px;">Results</h3>
268
+ <div class="result-text" id="resultDisplay"></div>
269
+
270
+ <div class="legend">
271
+ <div class="legend-item"><div class="legend-color entity-private_person"></div> Person</div>
272
+ <div class="legend-item"><div class="legend-color entity-private_email"></div> Email</div>
273
+ <div class="legend-item"><div class="legend-color entity-private_phone"></div> Phone</div>
274
+ <div class="legend-item"><div class="legend-color entity-private_address"></div> Address</div>
275
+ <div class="legend-item"><div class="legend-color entity-account_number"></div> Account</div>
276
+ <div class="legend-item"><div class="legend-color entity-secret"></div> Secret</div>
277
+ <div class="legend-item"><div class="legend-color entity-private_url"></div> URL</div>
278
+ <div class="legend-item"><div class="legend-color entity-private_date"></div> Date</div>
279
+ </div>
280
+
281
+ <div class="details-list" id="detailsList"></div>
282
+ </div>
283
+ </div>
284
+
285
+ <script>
286
+ let statusCheckInterval = null;
287
+ let isModelLoaded = false;
288
+ let retryCount = 0;
289
+ const maxRetries = 200; // 16 minutes of retrying (200 * 5 seconds)
290
+
291
+ function updateStatus(state, message) {
292
+ const statusIndicator = document.getElementById("statusIndicator");
293
+ const statusText = document.getElementById("statusText");
294
+ const progressBar = document.getElementById("progressBar");
295
+ const btn = document.getElementById("analyzeBtn");
296
+
297
+ switch(state) {
298
+ case 'connecting':
299
+ statusIndicator.className = "status-indicator status-waiting";
300
+ statusText.innerHTML = `<span class="loading-spinner"></span>${message}`;
301
+ btn.disabled = true;
302
+ btn.textContent = "Server is starting up...";
303
+ progressBar.style.display = "block";
304
+ break;
305
+ case 'loading':
306
+ statusIndicator.className = "status-indicator status-loading";
307
+ statusText.innerHTML = `<span class="loading-spinner"></span>${message}`;
308
+ btn.disabled = true;
309
+ btn.textContent = "Model is loading...";
310
+ progressBar.style.display = "block";
311
+ break;
312
+ case 'ready':
313
+ statusIndicator.className = "status-indicator status-ok";
314
+ statusText.innerHTML = "✓ " + message;
315
+ btn.disabled = false;
316
+ btn.textContent = "Detect PII";
317
+ progressBar.style.display = "none";
318
+ break;
319
+ case 'error':
320
+ statusIndicator.className = "status-indicator status-error";
321
+ statusText.innerHTML = "✗ " + message;
322
+ btn.disabled = true;
323
+ btn.textContent = "Model unavailable";
324
+ progressBar.style.display = "none";
325
+ break;
326
+ }
327
+ }
328
+
329
+ // Check model status on page load and keep polling
330
+ async function checkModelStatus() {
331
+ retryCount++;
332
+
333
+ if (retryCount > maxRetries) {
334
+ updateStatus('error', 'Server did not respond after 16 minutes. Refresh to retry.');
335
+ clearInterval(statusCheckInterval);
336
+ statusCheckInterval = null;
337
+ // Show reload button
338
+ updateStatus('error', 'Server did not respond. <button onclick="location.reload()">Refresh Page</button>');
339
+ return;
340
+ }
341
+
342
+ try {
343
+ const response = await fetch("/health", {
344
+ method: "GET",
345
+ headers: { "Cache-Control": "no-cache" }
346
+ });
347
+
348
+ if (!response.ok) {
349
+ throw new Error(`HTTP ${response.status}`);
350
+ }
351
+
352
+ const data = await response.json();
353
+ console.log("Health check response:", data);
354
+
355
+ if (data.model_loading) {
356
+ // Still loading
357
+ updateStatus('loading', `Model loading initialized... (5-10 minutes on first run)`);
358
+
359
+ if (!statusCheckInterval) {
360
+ statusCheckInterval = setInterval(checkModelStatus, 5000);
361
+ }
362
+ isModelLoaded = false;
363
+ } else if (data.model_loaded) {
364
+ // Model ready
365
+ updateStatus('ready', 'Model loaded and ready');
366
+
367
+ if (statusCheckInterval) {
368
+ clearInterval(statusCheckInterval);
369
+ statusCheckInterval = null;
370
+ }
371
+ isModelLoaded = true;
372
+ retryCount = 0;
373
+ } else {
374
+ // Model failed
375
+ updateStatus('error', `Model failed: ${data.error || "Unknown error"}`);
376
+
377
+ const errorBox = document.getElementById("errorBox");
378
+ errorBox.style.display = "block";
379
+ errorBox.innerHTML = `<strong>Error:</strong> ${data.error || "Unknown error"}`;
380
+
381
+ if (statusCheckInterval) {
382
+ clearInterval(statusCheckInterval);
383
+ statusCheckInterval = null;
384
+ }
385
+ isModelLoaded = false;
386
+ }
387
+ } catch (error) {
388
+ console.error("Health check failed:", error);
389
+ // Server not ready yet, show connecting state
390
+ updateStatus('connecting', `Waiting for server to start... (attempt ${retryCount})`);
391
+
392
+ if (!statusCheckInterval) {
393
+ statusCheckInterval = setInterval(checkModelStatus, 5000);
394
+ }
395
+ }
396
+ }
397
+
398
+ // Start checking immediately with connecting state
399
+ checkModelStatus();
400
+
401
+ async function analyzeText() {
402
+ const text = document.getElementById("inputText").value;
403
+ const btn = document.getElementById("analyzeBtn");
404
+ const resultsCard = document.getElementById("resultsCard");
405
+ const errorBox = document.getElementById("errorBox");
406
+
407
+ if (!text.trim()) {
408
+ errorBox.style.display = "block";
409
+ errorBox.textContent = "Please enter some text first!";
410
+ return;
411
+ }
412
+
413
+ btn.disabled = true;
414
+ btn.innerHTML = '<span class="loading-spinner"></span>Analyzing...';
415
+ errorBox.style.display = "none";
416
+
417
+ try {
418
+ const response = await fetch("/analyze", {
419
+ method: "POST",
420
+ headers: { "Content-Type": "application/json" },
421
+ body: JSON.stringify({ text: text })
422
+ });
423
+
424
+ const data = await response.json();
425
+
426
+ if (!response.ok || !data.success) {
427
+ throw new Error(data.error || "Server error");
428
+ }
429
+
430
+ displayResults(data, text);
431
+ resultsCard.classList.add("active");
432
+
433
+ } catch (error) {
434
+ console.error("Error during analysis:", error);
435
+ errorBox.style.display = "block";
436
+ errorBox.textContent = "Error: " + error.message;
437
+ resultsCard.classList.remove("active");
438
+ } finally {
439
+ if (isModelLoaded) {
440
+ btn.disabled = false;
441
+ btn.textContent = "Detect PII";
442
+ }
443
+ }
444
+ }
445
+
446
+ function displayResults(data, originalText) {
447
+ let html = "";
448
+ let lastEnd = 0;
449
+
450
+ if (data.entities && data.entities.length > 0) {
451
+ const sorted = data.entities.sort((a, b) => a.start - b.start);
452
+
453
+ for (const entity of sorted) {
454
+ html += escapeHtml(originalText.slice(lastEnd, entity.start));
455
+ html += `<span class="entity entity-${entity.label}">${escapeHtml(entity.text)}</span>`;
456
+ lastEnd = entity.end;
457
+ }
458
+ html += escapeHtml(originalText.slice(lastEnd));
459
+
460
+ const detailsHtml = sorted.map(e => `
461
+ <div class="detail-item">
462
+ <div>
463
+ <span class="detail-type">${e.label}</span>: ${escapeHtml(e.text)}
464
+ </div>
465
+ <div class="detail-score">Score: ${(e.score * 100).toFixed(2)}%</div>
466
+ </div>
467
+ `).join("");
468
+ document.getElementById("detailsList").innerHTML = "<h4 style='margin:20px 0 10px 0;'>Detected Entities:</h4>" + detailsHtml;
469
+ } else {
470
+ html = escapeHtml(originalText) + "\\n\\n[No PII detected]";
471
+ document.getElementById("detailsList").innerHTML = "";
472
+ }
473
+
474
+ document.getElementById("resultDisplay").innerHTML = html;
475
+ }
476
+
477
+ function escapeHtml(text) {
478
+ const div = document.createElement("div");
479
+ div.textContent = text;
480
+ return div.innerHTML;
481
+ }
482
+
483
+ // Cleanup on page unload
484
+ window.addEventListener("beforeunload", () => {
485
+ if (statusCheckInterval) {
486
+ clearInterval(statusCheckInterval);
487
+ }
488
+ });
489
+
490
+ // Add keyboard shortcut (Ctrl+Enter to analyze)
491
+ document.addEventListener('DOMContentLoaded', () => {
492
+ document.getElementById('inputText').addEventListener('keydown', function(e) {
493
+ if (e.ctrlKey && e.key === 'Enter') {
494
+ analyzeText();
495
+ }
496
+ });
497
+ });
498
+ </script>
499
+ </body>
500
+ </html>
501
+ '''
502
+
503
+ @app.route('/')
504
+ def index():
505
+ return render_template_string(HTML_TEMPLATE)
506
+
507
+ @app.route('/health')
508
+ def health():
509
+ """Health check with model loading status"""
510
+ global classifier, model_loading, model_error, model_thread
511
+
512
+ if classifier is not None:
513
+ return jsonify({
514
+ 'status': 'healthy',
515
+ 'model_loaded': True,
516
+ 'model_loading': False
517
+ })
518
+ elif model_loading:
519
+ return jsonify({
520
+ 'status': 'loading',
521
+ 'model_loaded': False,
522
+ 'model_loading': True,
523
+ 'message': 'Model is still loading, please wait...'
524
+ })
525
+ else:
526
+ # Model failed or thread died
527
+ return jsonify({
528
+ 'status': 'unhealthy',
529
+ 'model_loaded': False,
530
+ 'model_loading': False,
531
+ 'error': model_error or 'Model loading failed or thread terminated unexpectedly'
532
+ }), 503
533
+
534
+ @app.route('/analyze', methods=['POST', 'OPTIONS'])
535
+ def analyze():
536
+ if request.method == 'OPTIONS':
537
+ return '', 204
538
+
539
+ global classifier, model_loading
540
+
541
+ if classifier is None:
542
+ return jsonify({
543
+ 'success': False,
544
+ 'error': f'Model not yet loaded. Current status: {"loading" if model_loading else "failed"}. Please wait and refresh in a few minutes.'
545
+ }), 503
546
+
547
+ try:
548
+ data = request.get_json()
549
+
550
+ if not data:
551
+ return jsonify({'success': False, 'error': 'No JSON data received'}), 400
552
+
553
+ text = data.get('text', '')
554
+
555
+ if not text.strip():
556
+ return jsonify({'success': True, 'entities': [], 'entity_count': 0})
557
+
558
+ # Run classification
559
+ results = classifier(text)
560
+
561
+ entities = []
562
+ for entity in results:
563
+ entities.append({
564
+ 'label': entity.get('entity_group', entity.get('entity', 'unknown')),
565
+ 'text': entity.get('word', ''),
566
+ 'start': entity.get('start', 0),
567
+ 'end': entity.get('end', 0),
568
+ 'score': float(entity.get('score', 0))
569
+ })
570
+
571
+ return jsonify({
572
+ 'success': True,
573
+ 'entities': entities,
574
+ 'entity_count': len(entities)
575
+ })
576
+
577
+ except Exception as e:
578
+ print(f"Error during analysis: {e}", flush=True)
579
+ import traceback
580
+ traceback.print_exc()
581
+ return jsonify({
582
+ 'success': False,
583
+ 'error': str(e)
584
+ }), 500
585
+
586
+ if __name__ == '__main__':
587
+ port = int(os.environ.get('PORT', 7860))
588
+ app.run(host='0.0.0.0', port=port, debug=False, threaded=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ flask>=2.0.0
2
+ flask-cors>=4.0.0
3
+ torch>=2.0.0
4
+ transformers>=4.30.0
5
+ sentencepiece>=0.1.99
6
+ protobuf>=3.20.0