Spaces:
Sleeping
Sleeping
Update app_gradio.py
Browse files- app_gradio.py +10 -28
app_gradio.py
CHANGED
|
@@ -9,6 +9,10 @@ import pandas as pd
|
|
| 9 |
import numpy as np # <-- Added numpy for percentiles
|
| 10 |
import gradio as gr
|
| 11 |
from classify import classify_log, classify_csv
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
SOURCES = [
|
| 14 |
"ModernCRM", "ModernHR", "BillingSystem",
|
|
@@ -383,8 +387,11 @@ code, pre {
|
|
| 383 |
|
| 384 |
# ββ Functions βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 385 |
def classify_single(source: str, log_message: str):
|
|
|
|
| 386 |
if not log_message.strip():
|
| 387 |
return "β", "β", "β", "β"
|
|
|
|
|
|
|
| 388 |
t0 = time.perf_counter()
|
| 389 |
result = classify_log(source, log_message)
|
| 390 |
latency_ms = (time.perf_counter() - t0) * 1000
|
|
@@ -417,7 +424,7 @@ def classify_batch(file):
|
|
| 417 |
tier_lines = "\n".join(f" {TIER_COLORS.get(k,'βͺ')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
|
| 418 |
label_lines = "\n".join(f" β’ {k}: {v}" for k, v in label_counts.items())
|
| 419 |
|
| 420 |
-
# Calculate Latencies
|
| 421 |
if "latency_ms" in df.columns and not df["latency_ms"].empty:
|
| 422 |
latencies = df["latency_ms"].dropna()
|
| 423 |
p50 = np.percentile(latencies, 50)
|
|
@@ -525,7 +532,7 @@ Upload a CSV with columns: **`source`**, **`log_message`** Output includes: `pre
|
|
| 525 |
batch_btn = gr.Button("βΆ CLASSIFY ALL", variant="primary")
|
| 526 |
with gr.Column():
|
| 527 |
csv_output = gr.File(label="π₯ DOWNLOAD RESULTS")
|
| 528 |
-
# Increased lines
|
| 529 |
stats_out = gr.Textbox(label="π STATISTICS", lines=16, interactive=False)
|
| 530 |
|
| 531 |
batch_btn.click(
|
|
@@ -536,29 +543,4 @@ Upload a CSV with columns: **`source`**, **`log_message`** Output includes: `pre
|
|
| 536 |
|
| 537 |
gr.Markdown("""
|
| 538 |
**Sample CSV format:**
|
| 539 |
-
""")
|
| 540 |
-
|
| 541 |
-
# ββ Tab 3: Architecture βββββββββββββββββββββββββββββββββββββββββββ
|
| 542 |
-
with gr.Tab("ποΈ ARCHITECTURE"):
|
| 543 |
-
gr.Markdown("""
|
| 544 |
-
## 3-Tier Hybrid Pipeline
|
| 545 |
-
|
| 546 |
-
| Tier | Method | Coverage | Latency | Trigger |
|
| 547 |
-
|------|--------|----------|---------|---------|
|
| 548 |
-
| π’ **Regex** | Python `re` patterns | ~21% | < 1ms | Fixed patterns |
|
| 549 |
-
| π΅ **BERT** | `all-MiniLM-L6-v2` + LogReg | ~79% | 20β80ms | High-volume categories |
|
| 550 |
-
| π‘ **LLM** | HuggingFace Inference API | ~0.3% | 500β2000ms | LegacyCRM + rare patterns |
|
| 551 |
-
|
| 552 |
-
## Model Performance
|
| 553 |
-
- **Training data**: 2,410 synthetic enterprise logs
|
| 554 |
-
- **Confidence threshold**: 0.5 (below β escalate to LLM)
|
| 555 |
-
- **Source-aware routing**: `LegacyCRM` β LLM directly
|
| 556 |
-
|
| 557 |
-
## Environment Variables
|
| 558 |
-
| Secret | Purpose |
|
| 559 |
-
|--------|---------|
|
| 560 |
-
| `HF_TOKEN` | LLM inference for LegacyCRM logs |
|
| 561 |
-
""")
|
| 562 |
-
|
| 563 |
-
if __name__ == "__main__":
|
| 564 |
-
demo.launch(server_name="0.0.0.0", server_port=7860, theme=THEME, css=CUSTOM_CSS)
|
|
|
|
| 9 |
import numpy as np # <-- Added numpy for percentiles
|
| 10 |
import gradio as gr
|
| 11 |
from classify import classify_log, classify_csv
|
| 12 |
+
from processor_bert import preload_models
|
| 13 |
+
|
| 14 |
+
# ββ Preload models in background at startup βββββββββββββββββ
|
| 15 |
+
preload_models()
|
| 16 |
|
| 17 |
SOURCES = [
|
| 18 |
"ModernCRM", "ModernHR", "BillingSystem",
|
|
|
|
| 387 |
|
| 388 |
# ββ Functions βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 389 |
def classify_single(source: str, log_message: str):
|
| 390 |
+
from processor_bert import _model_ready
|
| 391 |
if not log_message.strip():
|
| 392 |
return "β", "β", "β", "β"
|
| 393 |
+
if not _model_ready:
|
| 394 |
+
return "β³ Model loading...", "Please wait ~60s", "β", "β"
|
| 395 |
t0 = time.perf_counter()
|
| 396 |
result = classify_log(source, log_message)
|
| 397 |
latency_ms = (time.perf_counter() - t0) * 1000
|
|
|
|
| 424 |
tier_lines = "\n".join(f" {TIER_COLORS.get(k,'βͺ')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
|
| 425 |
label_lines = "\n".join(f" β’ {k}: {v}" for k, v in label_counts.items())
|
| 426 |
|
| 427 |
+
# Calculate Latencies
|
| 428 |
if "latency_ms" in df.columns and not df["latency_ms"].empty:
|
| 429 |
latencies = df["latency_ms"].dropna()
|
| 430 |
p50 = np.percentile(latencies, 50)
|
|
|
|
| 532 |
batch_btn = gr.Button("βΆ CLASSIFY ALL", variant="primary")
|
| 533 |
with gr.Column():
|
| 534 |
csv_output = gr.File(label="π₯ DOWNLOAD RESULTS")
|
| 535 |
+
# Increased lines to 16 to properly fit the latency metrics
|
| 536 |
stats_out = gr.Textbox(label="π STATISTICS", lines=16, interactive=False)
|
| 537 |
|
| 538 |
batch_btn.click(
|
|
|
|
| 543 |
|
| 544 |
gr.Markdown("""
|
| 545 |
**Sample CSV format:**
|
| 546 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|