Spaces:

NOT-OMEGA
/

LogAI-Engine

Running

App Files Files Community

NOT-OMEGA commited on Apr 16

Commit

9ca9aea

verified ·

1 Parent(s): 1a9b340

Update app_gradio.py

Browse files

Files changed (1) hide show

app_gradio.py +61 -40

app_gradio.py CHANGED Viewed

@@ -1,6 +1,11 @@
 """
 Log Classification System — HuggingFace Spaces
 Ultra-Modern 3D UI | Optimized for Gradio 6.0 & HF Free Tier
 """
 from __future__ import annotations
 import io
@@ -21,11 +26,11 @@ SOURCES = [
 ]
 def get_tier_icon(tier_name: str) -> str:
-    if "Regex" in tier_name: return "🟢"
-    if "BERT" in tier_name: return "🔵"
-    if "Cache Hit" in tier_name: return "⚡"
-    if "fallback" in tier_name: return "🟠"
-    if "LLM" in tier_name: return "🟡"
     return "⚪"
 EXAMPLE_LOGS = [
@@ -88,75 +93,91 @@ def classify_single(source: str, log_message: str):
         return "—", "—", "—", "—"
     if not _model_ready:
         return "⏳ Loading...", "Warming up", "—", "—"
     t0 = time.perf_counter()
     try:
         result = classify_log(source, log_message)
         latency = (time.perf_counter() - t0) * 1000
         icon = get_tier_icon(result["tier"])
         return (
-            result["label"],
-            f"{icon} {result['tier']}",
-            f"{result['confidence']:.1%}" if result["confidence"] else "N/A",
-            f"{latency:.4f} ms"
         )
     except Exception as e:
         return f"Error: {str(e)}", "Fail", "—", "—"
 def classify_batch(file, progress=gr.Progress(track_tqdm=True)):
-    if file is None: return None, "⚠️ Please upload a CSV file."
     progress(0, desc="🚀 Initializing Engine...")
     t0 = time.perf_counter()
     try:
-        # FIX: Generate a unique output path per user to prevent data bleeding
         unique_id = uuid.uuid4().hex
         safe_output_path = f"/tmp/classified_output_{unique_id}.csv"
         output_path, df = classify_csv(file.name, safe_output_path)
         total_time_sec = time.perf_counter() - t0
         progress(0.9, desc="📊 Calculating Metrics...")
-        total = len(df)
         label_counts = df["predicted_label"].value_counts().to_dict()
-        tier_counts = df["tier_used"].value_counts().to_dict()
         tier_lines = []
         for tier, count in tier_counts.items():
             tier_df = df[df["tier_used"] == tier]
-            lats = tier_df["latency_ms"].dropna()
-            icon = get_tier_icon(tier)
-            pct = count / total
             if "BERT" in tier:
-                total_ms = lats.sum()
-                tier_lines.append(f"  {icon} {tier}: Batch Latency {total_ms:.1f} ms (Over {count} logs)")
             elif "Regex" in tier:
                 p50 = np.percentile(lats, 50) if not lats.empty else 0
-                tier_lines.append(f"  {icon} {tier}: < 0.1 ms (p50: {p50:.4f} ms) | {count} logs ({pct:.0%})")
             else:
                 p50 = np.percentile(lats, 50) if not lats.empty else 0
                 p95 = np.percentile(lats, 95) if not lats.empty else 0
                 p99 = np.percentile(lats, 99) if not lats.empty else 0
-                tier_lines.append(f"  {icon} {tier}: {count} logs ({pct:.0%}) | p50={p50:.1f}ms p95={p95:.1f}ms p99={p99:.1f}ms")
         tier_lines_str = "\n".join(tier_lines)
-        label_lines = "\n".join([f"  • {k}: {v}" for k, v in label_counts.items()])
         stats = (
             f"✅ Classified {total} logs in {total_time_sec:.2f} s\n\n"
             f"📊 Performance by Tier:\n{tier_lines_str}\n\n"
             f"🏷️ Label distribution:\n{label_lines}"
         )
         progress(1.0, desc="✅ Success")
         return output_path, stats
     except Exception as e:
         return None, f"❌ System Error: {str(e)}"
 # ── Theme & Layout ──────────────────────────────────────────
 THEME = gr.themes.Base(
     primary_hue="blue",
@@ -175,14 +196,14 @@ with gr.Blocks(title="Log AI Engine") as demo:
                     src_in = gr.Dropdown(choices=SOURCES, value="ModernCRM", label="SOURCE")
                 with gr.Column(scale=3):
                     msg_in = gr.Textbox(label="LOG MESSAGE", placeholder="Paste raw log string...", lines=3)
             run_btn = gr.Button("▶ CLASSIFY LOG", variant="primary")
             with gr.Row():
-                lbl_out = gr.Textbox(label="PREDICTED LABEL")
                 tier_out = gr.Textbox(label="TIER USED")
                 conf_out = gr.Textbox(label="CONFIDENCE")
-                lat_out = gr.Textbox(label="LATENCY")
             run_btn.click(classify_single, [src_in, msg_in], [lbl_out, tier_out, conf_out, lat_out])
             gr.Examples(examples=EXAMPLE_LOGS, inputs=[src_in, msg_in])
@@ -190,12 +211,12 @@ with gr.Blocks(title="Log AI Engine") as demo:
         with gr.Tab("📦 BATCH PROCESSING"):
             with gr.Row():
                 with gr.Column():
-                    csv_in = gr.File(label="UPLOAD CSV", file_types=[".csv"])
                     batch_btn = gr.Button("▶ START BATCH PROCESS", variant="primary")
                 with gr.Column():
-                    csv_out = gr.File(label="DOWNLOAD CLASSIFIED DATA")
                     stats_out = gr.Textbox(label="PIPELINE ANALYTICS", lines=16, elem_classes="output-stats")
             batch_btn.click(classify_batch, inputs=[csv_in], outputs=[csv_out, stats_out])
 demo.queue(default_concurrency_limit=2).launch(
@@ -203,4 +224,4 @@ demo.queue(default_concurrency_limit=2).launch(
     server_port=7860,
     theme=THEME,
     css=CUSTOM_CSS
-)

 """
 Log Classification System — HuggingFace Spaces
 Ultra-Modern 3D UI | Optimized for Gradio 6.0 & HF Free Tier
+Bug fixes vs previous version:
+  - BERT latency display: no longer shows cumulative sum (was showing 2,962,635 ms).
+    Now shows real per-log wall-clock latency from classify.py fix.
+  - Added bert_wall_ms tracking in stats display so batch total is visible clearly.
 """
 from __future__ import annotations
 import io
 ]
 def get_tier_icon(tier_name: str) -> str:
+    if "Regex" in tier_name:      return "🟢"
+    if "BERT" in tier_name:       return "🔵"
+    if "Cache Hit" in tier_name:  return "⚡"
+    if "fallback" in tier_name:   return "🟠"
+    if "LLM" in tier_name:        return "🟡"
     return "⚪"
 EXAMPLE_LOGS = [
         return "—", "—", "—", "—"
     if not _model_ready:
         return "⏳ Loading...", "Warming up", "—", "—"
     t0 = time.perf_counter()
     try:
         result = classify_log(source, log_message)
         latency = (time.perf_counter() - t0) * 1000
         icon = get_tier_icon(result["tier"])
         return (
+            result["label"],
+            f"{icon} {result['tier']}",
+            f"{result['confidence']:.1%}" if result["confidence"] else "N/A",
+            f"{latency:.4f} ms"
         )
     except Exception as e:
         return f"Error: {str(e)}", "Fail", "—", "—"
 def classify_batch(file, progress=gr.Progress(track_tqdm=True)):
+    if file is None:
+        return None, "⚠️ Please upload a CSV file."
     progress(0, desc="🚀 Initializing Engine...")
     t0 = time.perf_counter()
     try:
+        # Generate a unique output path per user to prevent data bleeding
         unique_id = uuid.uuid4().hex
         safe_output_path = f"/tmp/classified_output_{unique_id}.csv"
         output_path, df = classify_csv(file.name, safe_output_path)
         total_time_sec = time.perf_counter() - t0
         progress(0.9, desc="📊 Calculating Metrics...")
+        total        = len(df)
         label_counts = df["predicted_label"].value_counts().to_dict()
+        tier_counts  = df["tier_used"].value_counts().to_dict()
         tier_lines = []
         for tier, count in tier_counts.items():
             tier_df = df[df["tier_used"] == tier]
+            lats    = tier_df["latency_ms"].dropna()
+            icon    = get_tier_icon(tier)
+            pct     = count / total
             if "BERT" in tier:
+                # BUG FIX: latency_ms now holds true per-log wall-clock time.
+                # Show per-log p50 AND reconstructed batch total for clarity.
+                p50       = np.percentile(lats, 50) if not lats.empty else 0
+                # Each stored value is already per-log wall time (total_wall/n),
+                # so multiplying by count reconstructs actual batch wall time.
+                batch_ms  = p50 * count
+                tier_lines.append(
+                    f"  {icon} {tier}: p50={p50:.2f} ms/log | "
+                    f"Batch total ~{batch_ms/1000:.1f} s (Over {count} logs)"
+                )
             elif "Regex" in tier:
                 p50 = np.percentile(lats, 50) if not lats.empty else 0
+                tier_lines.append(
+                    f"  {icon} {tier}: < 0.1 ms (p50: {p50:.4f} ms) | {count} logs ({pct:.0%})"
+                )
             else:
                 p50 = np.percentile(lats, 50) if not lats.empty else 0
                 p95 = np.percentile(lats, 95) if not lats.empty else 0
                 p99 = np.percentile(lats, 99) if not lats.empty else 0
+                tier_lines.append(
+                    f"  {icon} {tier}: {count} logs ({pct:.0%}) | "
+                    f"p50={p50:.1f}ms p95={p95:.1f}ms p99={p99:.1f}ms"
+                )
         tier_lines_str = "\n".join(tier_lines)
+        label_lines    = "\n".join([f"  • {k}: {v}" for k, v in label_counts.items()])
         stats = (
             f"✅ Classified {total} logs in {total_time_sec:.2f} s\n\n"
             f"📊 Performance by Tier:\n{tier_lines_str}\n\n"
             f"🏷️ Label distribution:\n{label_lines}"
         )
         progress(1.0, desc="✅ Success")
         return output_path, stats
     except Exception as e:
         return None, f"❌ System Error: {str(e)}"
 # ── Theme & Layout ──────────────────────────────────────────
 THEME = gr.themes.Base(
     primary_hue="blue",
                     src_in = gr.Dropdown(choices=SOURCES, value="ModernCRM", label="SOURCE")
                 with gr.Column(scale=3):
                     msg_in = gr.Textbox(label="LOG MESSAGE", placeholder="Paste raw log string...", lines=3)
             run_btn = gr.Button("▶ CLASSIFY LOG", variant="primary")
             with gr.Row():
+                lbl_out  = gr.Textbox(label="PREDICTED LABEL")
                 tier_out = gr.Textbox(label="TIER USED")
                 conf_out = gr.Textbox(label="CONFIDENCE")
+                lat_out  = gr.Textbox(label="LATENCY")
             run_btn.click(classify_single, [src_in, msg_in], [lbl_out, tier_out, conf_out, lat_out])
             gr.Examples(examples=EXAMPLE_LOGS, inputs=[src_in, msg_in])
         with gr.Tab("📦 BATCH PROCESSING"):
             with gr.Row():
                 with gr.Column():
+                    csv_in    = gr.File(label="UPLOAD CSV", file_types=[".csv"])
                     batch_btn = gr.Button("▶ START BATCH PROCESS", variant="primary")
                 with gr.Column():
+                    csv_out   = gr.File(label="DOWNLOAD CLASSIFIED DATA")
                     stats_out = gr.Textbox(label="PIPELINE ANALYTICS", lines=16, elem_classes="output-stats")
             batch_btn.click(classify_batch, inputs=[csv_in], outputs=[csv_out, stats_out])
 demo.queue(default_concurrency_limit=2).launch(
     server_port=7860,
     theme=THEME,
     css=CUSTOM_CSS
+)