NOT-OMEGA commited on
Commit
22aa505
Β·
verified Β·
1 Parent(s): f17175f

Update app_gradio.py

Browse files
Files changed (1) hide show
  1. app_gradio.py +10 -28
app_gradio.py CHANGED
@@ -9,6 +9,10 @@ import pandas as pd
9
  import numpy as np # <-- Added numpy for percentiles
10
  import gradio as gr
11
  from classify import classify_log, classify_csv
 
 
 
 
12
 
13
  SOURCES = [
14
  "ModernCRM", "ModernHR", "BillingSystem",
@@ -383,8 +387,11 @@ code, pre {
383
 
384
  # ── Functions ───────────────────────────────────────────────────────────────
385
  def classify_single(source: str, log_message: str):
 
386
  if not log_message.strip():
387
  return "β€”", "β€”", "β€”", "β€”"
 
 
388
  t0 = time.perf_counter()
389
  result = classify_log(source, log_message)
390
  latency_ms = (time.perf_counter() - t0) * 1000
@@ -417,7 +424,7 @@ def classify_batch(file):
417
  tier_lines = "\n".join(f" {TIER_COLORS.get(k,'βšͺ')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
418
  label_lines = "\n".join(f" β€’ {k}: {v}" for k, v in label_counts.items())
419
 
420
- # Calculate Latencies (Requires 'latency_ms' column in CSV output from classify_csv)
421
  if "latency_ms" in df.columns and not df["latency_ms"].empty:
422
  latencies = df["latency_ms"].dropna()
423
  p50 = np.percentile(latencies, 50)
@@ -525,7 +532,7 @@ Upload a CSV with columns: **`source`**, **`log_message`** Output includes: `pre
525
  batch_btn = gr.Button("β–Ά CLASSIFY ALL", variant="primary")
526
  with gr.Column():
527
  csv_output = gr.File(label="πŸ“₯ DOWNLOAD RESULTS")
528
- # Increased lines from 12 to 16 to fit the new metrics nicely
529
  stats_out = gr.Textbox(label="πŸ“Š STATISTICS", lines=16, interactive=False)
530
 
531
  batch_btn.click(
@@ -536,29 +543,4 @@ Upload a CSV with columns: **`source`**, **`log_message`** Output includes: `pre
536
 
537
  gr.Markdown("""
538
  **Sample CSV format:**
539
- """)
540
-
541
- # ── Tab 3: Architecture ───────────────────────────────────────────
542
- with gr.Tab("πŸ—οΈ ARCHITECTURE"):
543
- gr.Markdown("""
544
- ## 3-Tier Hybrid Pipeline
545
-
546
- | Tier | Method | Coverage | Latency | Trigger |
547
- |------|--------|----------|---------|---------|
548
- | 🟒 **Regex** | Python `re` patterns | ~21% | < 1ms | Fixed patterns |
549
- | πŸ”΅ **BERT** | `all-MiniLM-L6-v2` + LogReg | ~79% | 20–80ms | High-volume categories |
550
- | 🟑 **LLM** | HuggingFace Inference API | ~0.3% | 500–2000ms | LegacyCRM + rare patterns |
551
-
552
- ## Model Performance
553
- - **Training data**: 2,410 synthetic enterprise logs
554
- - **Confidence threshold**: 0.5 (below β†’ escalate to LLM)
555
- - **Source-aware routing**: `LegacyCRM` β†’ LLM directly
556
-
557
- ## Environment Variables
558
- | Secret | Purpose |
559
- |--------|---------|
560
- | `HF_TOKEN` | LLM inference for LegacyCRM logs |
561
- """)
562
-
563
- if __name__ == "__main__":
564
- demo.launch(server_name="0.0.0.0", server_port=7860, theme=THEME, css=CUSTOM_CSS)
 
9
  import numpy as np # <-- Added numpy for percentiles
10
  import gradio as gr
11
  from classify import classify_log, classify_csv
12
+ from processor_bert import preload_models
13
+
14
+ # ── Preload models in background at startup ─────────────────
15
+ preload_models()
16
 
17
  SOURCES = [
18
  "ModernCRM", "ModernHR", "BillingSystem",
 
387
 
388
  # ── Functions ───────────────────────────────────────────────────────────────
389
  def classify_single(source: str, log_message: str):
390
+ from processor_bert import _model_ready
391
  if not log_message.strip():
392
  return "β€”", "β€”", "β€”", "β€”"
393
+ if not _model_ready:
394
+ return "⏳ Model loading...", "Please wait ~60s", "β€”", "β€”"
395
  t0 = time.perf_counter()
396
  result = classify_log(source, log_message)
397
  latency_ms = (time.perf_counter() - t0) * 1000
 
424
  tier_lines = "\n".join(f" {TIER_COLORS.get(k,'βšͺ')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
425
  label_lines = "\n".join(f" β€’ {k}: {v}" for k, v in label_counts.items())
426
 
427
+ # Calculate Latencies
428
  if "latency_ms" in df.columns and not df["latency_ms"].empty:
429
  latencies = df["latency_ms"].dropna()
430
  p50 = np.percentile(latencies, 50)
 
532
  batch_btn = gr.Button("β–Ά CLASSIFY ALL", variant="primary")
533
  with gr.Column():
534
  csv_output = gr.File(label="πŸ“₯ DOWNLOAD RESULTS")
535
+ # Increased lines to 16 to properly fit the latency metrics
536
  stats_out = gr.Textbox(label="πŸ“Š STATISTICS", lines=16, interactive=False)
537
 
538
  batch_btn.click(
 
543
 
544
  gr.Markdown("""
545
  **Sample CSV format:**
546
+ """)